In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType

# Initialize a SparkSession
spark = SparkSession.builder.master("local[*]").appName("Calculator").getOrCreate()

def calculator(a, b, operation):
    if operation == "add":
        return a + b
    elif operation == "subtract":
        return a - b
    elif operation == "multiply":
        return a * b
    elif operation == "divide":
        if b != 0:
            return a / b
        else:
            return "Division by zero error"
    else:
        return "Invalid operation"
    
# Example data
data = [
    (50, 5, "add"),
    (20, 5, "subtract"),
    (10, 5, "multiply"),
    (10, 2, "divide"),
]

# Create a DataFrame
df = spark.createDataFrame(data, ["a", "b", "operation"])
df.show()

# Create a UDF from the calculator function
calculator_udf = udf(lambda a, b, operation: calculator(a, b, operation), StringType())

# Apply the UDF to the DataFrame
result_df = df.withColumn("result", calculator_udf(df.a, df.b, df.operation))

# Show the result
result_df.show()


+---+---+---------+
|  a|  b|operation|
+---+---+---------+
| 50|  5|      add|
| 20|  5| subtract|
| 10|  5| multiply|
| 10|  2|   divide|
+---+---+---------+

+---+---+---------+------+
|  a|  b|operation|result|
+---+---+---------+------+
| 50|  5|      add|    55|
| 20|  5| subtract|    15|
| 10|  5| multiply|    50|
| 10|  2|   divide|   5.0|
+---+---+---------+------+



In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import DoubleType
spark = SparkSession.builder.appName("Calculator").getOrCreate()
def add(x, y):
    return float(x) + float(y)

def subtract(x, y):
    return float(x) - float(y)

def multiply(x, y):
    return float(x) * float(y)

def divide(x, y):
    if y == 0:
        return None  # Handle division by zero
    return x / y

add_udf = udf(add, DoubleType())
subtract_udf = udf(subtract, DoubleType())
multiply_udf = udf(multiply, DoubleType())
divide_udf = udf(divide, DoubleType())

 #Create a sample DataFrame with two columns 'x' and 'y'
data = [(10, 5), (20, 0), (15, 3), (50, 25)]
columns = ["x", "y"]

df = spark.createDataFrame(data, columns)

# Show the original DataFrame
df.show()

# Add
df_with_addition = df.withColumn("addition", add_udf(df["x"], df["y"]))
# Subtract
df_with_subtraction = df_with_addition.withColumn("subtraction", subtract_udf(df["x"], df["y"]))
# Multiply
df_with_multiplication = df_with_subtraction.withColumn("multiplication", multiply_udf(df["x"], df["y"]))
# Divide
df_with_division = df_with_multiplication.withColumn("division", divide_udf(df["x"], df["y"]))

df_with_addition.show()
df_with_subtraction.show()
df_with_multiplication.show()
df_with_division.show()

+---+---+
|  x|  y|
+---+---+
| 10|  5|
| 20|  0|
| 15|  3|
| 50| 25|
+---+---+

+---+---+--------+
|  x|  y|addition|
+---+---+--------+
| 10|  5|    15.0|
| 20|  0|    20.0|
| 15|  3|    18.0|
| 50| 25|    75.0|
+---+---+--------+

+---+---+--------+-----------+
|  x|  y|addition|subtraction|
+---+---+--------+-----------+
| 10|  5|    15.0|        5.0|
| 20|  0|    20.0|       20.0|
| 15|  3|    18.0|       12.0|
| 50| 25|    75.0|       25.0|
+---+---+--------+-----------+

+---+---+--------+-----------+--------------+
|  x|  y|addition|subtraction|multiplication|
+---+---+--------+-----------+--------------+
| 10|  5|    15.0|        5.0|          50.0|
| 20|  0|    20.0|       20.0|           0.0|
| 15|  3|    18.0|       12.0|          45.0|
| 50| 25|    75.0|       25.0|        1250.0|
+---+---+--------+-----------+--------------+

+---+---+--------+-----------+--------------+--------+
|  x|  y|addition|subtraction|multiplication|division|
+---+---+--------+-----------+--------