In [0]:
import pyspark
from pyspark.sql import SparkSession

# Create a SparkSession
spark = SparkSession.builder \
    .appName("CodingAssessment") \
    .getOrCreate()

In [0]:
# Create sample DataFrames
data1 = [(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')]
df1 = spark.createDataFrame(data1, ['A', 'B'])

data2 = [(3, 'x'), (4, 'y'), (5, 'z'), (6, 'w')]
df2 = spark.createDataFrame(data2, ['A', 'C'])
df1.show()
df2.show()

+---+---+
|  A|  B|
+---+---+
|  1|  a|
|  2|  b|
|  3|  c|
|  4|  d|
+---+---+

+---+---+
|  A|  C|
+---+---+
|  3|  x|
|  4|  y|
|  5|  z|
|  6|  w|
+---+---+



In [0]:
# Manipulating DataFrame
from pyspark.sql.functions import col
df1 = df1.withColumn('A', col('A') * 2)
print("Modified DataFrame 1:")
df1.show()

Modified DataFrame 1:
+---+---+
|  A|  B|
+---+---+
|  2|  a|
|  4|  b|
|  6|  c|
|  8|  d|
+---+---+



In [0]:
# Dropping columns
df2 = df2.drop('C')
print("\nDataFrame 2 after dropping column 'C':")
df2.show()


DataFrame 2 after dropping column 'C':
+---+
|  A|
+---+
|  3|
|  4|
|  5|
|  6|
+---+



In [0]:
# Sorting DataFrame
df1 = df1.orderBy('A')
print("\nDataFrame 1 sorted by column 'A':")
df1.show()


DataFrame 1 sorted by column 'A':
+---+---+
|  A|  B|
+---+---+
|  2|  a|
|  4|  b|
|  6|  c|
|  8|  d|
+---+---+



In [0]:
# Sorting DataFrame in descending order
df1 = df1.orderBy(col('A').desc())
df1.show()

+---+---+
|  A|  B|
+---+---+
|  8|  d|
|  6|  c|
|  4|  b|
|  2|  a|
+---+---+



In [0]:
from pyspark.sql.functions import avg

# Aggregations
mean_A = df1.select(avg('A')).collect()[0][0]
print("\nMean of column 'A' in DataFrame 1:", mean_A)


Mean of column 'A' in DataFrame 1: 5.0


In [0]:
# Joining DataFrames
df_merged = df1.join(df2, on='A', how='inner')
print("\nMerged DataFrame:")
df_merged.show()


Merged DataFrame:
+---+---+
|  A|  B|
+---+---+
|  4|  b|
|  6|  c|
+---+---+



In [0]:
# Grouping by DataFrame
grouped = df_merged.groupBy('B').sum('A')
print("\nGrouped DataFrame:")
grouped.show()


Grouped DataFrame:
+---+------+
|  B|sum(A)|
+---+------+
|  c|     6|
|  b|     4|
+---+------+

