In [None]:
# Install Java
!apt-get install openjdk-8-jdk-headless -qq > /dev/null

# Download and install Spark
!wget -q https://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop2.7.tgz
!tar xf spark-3.0.1-bin-hadoop2.7.tgz

# Install findspark, a Python library that makes it easier to find Spark
!pip install findspark

# Install SystemML
!pip install systemml

# Downgrade pandas to a compatible version
!pip install pandas==1.3.3


In [None]:
import os

os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-3.0.1-bin-hadoop2.7"
os.environ["PATH"] += f":{os.environ['SPARK_HOME']}/bin"



In [None]:
import findspark
findspark.init()

from pyspark.sql import SparkSession

# Initialize Spark session
spark = SparkSession.builder \
    .master("local[*]") \
    .appName("DML Script Example") \
    .getOrCreate()

from systemml import MLContext, dml

# Create an MLContext
ml = MLContext(spark)




In [None]:
# Define the DML script
dml_script = """
gen_wave = function(double freq, double amp, integer T, integer Hz, double pi_value) return (matrix[double] x) {
    time = seq(0, T-T/Hz, T/Hz)
    x = amp * sin(2 * pi_value * freq * time)
}

PI = 3.141592654
x = gen_wave(2, 1, 1, 50, PI)
N = nrow(x)
print(toString(x, sep="\\t\\t", decimal=1))
print(N)
"""

# Execute the DML script
ml.execute(dml(dml_script))

