# Loading data 

In [2]:
from pyspark.sql import SparkSession
file_path = 'Files/data/ipl_summary_raw.csv'
df = spark.read.csv(file_path, header=True, inferSchema=True)
display(df)

StatementMeta(, 9c8a3f2a-5ccd-42d1-8852-09c7f69e86bf, 4, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 93f01bf7-590f-4072-bfe0-01021cfad652)

# Data partitioning

In [3]:
df_partitioned = df.repartition("info_season")
df_partitioned.write.parquet("Files/data/ipl_partitioned")

StatementMeta(, 9c8a3f2a-5ccd-42d1-8852-09c7f69e86bf, 5, Finished, Available, Finished)

In [5]:
df_partitioned = df.repartition("info_season")
df_partitioned.write.saveAsTable("ipl_partitioned_table")

StatementMeta(, 9c8a3f2a-5ccd-42d1-8852-09c7f69e86bf, 7, Finished, Available, Finished)

In [6]:
df_partitioned = df.repartition("info_season")
df_partitioned.write.format("delta").saveAsTable("ipl_partitioned_delta_table")

StatementMeta(, 9c8a3f2a-5ccd-42d1-8852-09c7f69e86bf, 8, Finished, Available, Finished)


# Saving & Updating DataFrames

In [8]:
from pyspark.sql import Row
city_dimension_data = [
    Row(info_city="Hyderabad", city_population=10000000, city_state="Telangana", city_country="India"),
    Row(info_city="Bengaluru", city_population=12000000, city_state="Karnataka", city_country="India"),
    Row(info_city="Chennai", city_population=7000000, city_state="Tamil Nadu", city_country="India"),
]
city_dimension_data_df=spark.createDataFrame(city_dimension_data)
display(city_dimension_data_df)

from pyspark.sql.functions import when
left_joined_df = df.join(city_dimension_data_df,on="info_city",how="left")
df_replaced = df.withColumn("info_city", when(df.info_city == "Bangalore", "Bengaluru").otherwise(df.info_city))
left_joined_df = df_replaced.join(city_dimension_data_df, on="info_city", how="left").select("info_city", "info_outcome_winner", "city_state")
display(left_joined_df)
city_dimension_data_df.write.saveAsTable("city_dimension")
left_joined_df.write.saveAsTable("winner_by_state")

StatementMeta(, 9c8a3f2a-5ccd-42d1-8852-09c7f69e86bf, 10, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, a62defca-9c12-4a0c-b88c-8c10f744f1da)

SynapseWidget(Synapse.DataFrame, a82969d5-6630-4b59-836b-f70ac5b1fb97)

In [12]:
spark.sql("SELECT * FROM city_dimension").show()


StatementMeta(, 9c8a3f2a-5ccd-42d1-8852-09c7f69e86bf, 14, Finished, Available, Finished)

+---------+---------------+----------+------------+
|info_city|city_population|city_state|city_country|
+---------+---------------+----------+------------+
|Hyderabad|       10000000| Telangana|       India|
|Bengaluru|       12000000| Karnataka|       India|
|  Chennai|        7000000|Tamil Nadu|       India|
+---------+---------------+----------+------------+



In [16]:
spark.sql("SELECT * FROM winner_by_state WHERE info_city = 'Mumbai'").show()

StatementMeta(, 9c8a3f2a-5ccd-42d1-8852-09c7f69e86bf, 18, Finished, Available, Finished)

+---------+--------------------+----------+
|info_city| info_outcome_winner|city_state|
+---------+--------------------+----------+
|   Mumbai|Rising Pune Super...|      null|
|   Mumbai|     Kings XI Punjab|      null|
|   Mumbai|Rising Pune Super...|      null|
|   Mumbai|    Delhi Daredevils|      null|
|   Mumbai|                null|      null|
|   Mumbai|        Punjab Kings|      null|
|   Mumbai|Kolkata Knight Ri...|      null|
|   Mumbai|        Punjab Kings|      null|
|   Mumbai|      Gujarat Titans|      null|
|   Mumbai|Lucknow Super Giants|      null|
|   Mumbai|Kolkata Knight Ri...|      null|
|   Mumbai|        Punjab Kings|      null|
|   Mumbai|Lucknow Super Giants|      null|
|   Mumbai|Lucknow Super Giants|      null|
|   Mumbai|      Gujarat Titans|      null|
|   Mumbai|      Gujarat Titans|      null|
|   Mumbai|Lucknow Super Giants|      null|
|   Mumbai|Lucknow Super Giants|      null|
|   Mumbai|        Punjab Kings|      null|
|   Mumbai|      Gujarat Titans|

# overwrite to table

In [17]:
from pyspark.sql import Row
city_dimension_data = [
    Row(info_city="Hyderabad", city_population=10000000, city_state="Telangana", city_country="India"),
    Row(info_city="Bengaluru", city_population=12000000, city_state="Karnataka", city_country="India"),
    Row(info_city="Chennai", city_population=7000000, city_state="Tamil Nadu", city_country="India"),
    Row(info_city="Mumbai", city_population=7000000, city_state="Maharashtra", city_country="India"),
    Row(info_city="Chandigarh", city_population=7000000, city_state="Punjab", city_country="India")
]
city_dimension_data_df = spark.createDataFrame(city_dimension_data)
display(city_dimension_data_df)

StatementMeta(, 9c8a3f2a-5ccd-42d1-8852-09c7f69e86bf, 19, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 7569b4ba-47ef-484b-9748-666b1a752904)

In [18]:
city_dimension_data_df.write.mode("overwrite").saveAsTable("city_dimension")

StatementMeta(, 9c8a3f2a-5ccd-42d1-8852-09c7f69e86bf, 20, Finished, Available, Finished)

In [20]:

from pyspark.sql.functions import when
left_joined_df = df.join(city_dimension_data_df,on="info_city",how="left")
df_replaced = df.withColumn("info_city", when(df.info_city == "Bangalore", "Bengaluru").otherwise(df.info_city))
left_joined_df = df_replaced.join(city_dimension_data_df, on="info_city", how="left").select("info_city", "info_outcome_winner", "city_state")
display(left_joined_df)
left_joined_df.write.mode("overwrite").saveAsTable("winner_by_state")

StatementMeta(, 9c8a3f2a-5ccd-42d1-8852-09c7f69e86bf, 22, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, cecb8678-5c8b-4bb2-abaf-3ded386ed134)

In [21]:
spark.sql("SELECT * FROM winner_by_state WHERE info_city = 'Mumbai'").show()

StatementMeta(, 9c8a3f2a-5ccd-42d1-8852-09c7f69e86bf, 23, Finished, Available, Finished)

+---------+--------------------+-----------+
|info_city| info_outcome_winner| city_state|
+---------+--------------------+-----------+
|   Mumbai|      Mumbai Indians|Maharashtra|
|   Mumbai|      Mumbai Indians|Maharashtra|
|   Mumbai|      Mumbai Indians|Maharashtra|
|   Mumbai|      Mumbai Indians|Maharashtra|
|   Mumbai|Rising Pune Super...|Maharashtra|
|   Mumbai|      Mumbai Indians|Maharashtra|
|   Mumbai|     Kings XI Punjab|Maharashtra|
|   Mumbai|Rising Pune Super...|Maharashtra|
|   Mumbai| Chennai Super Kings|Maharashtra|
|   Mumbai|    Delhi Daredevils|Maharashtra|
|   Mumbai|      Mumbai Indians|Maharashtra|
|   Mumbai| Sunrisers Hyderabad|Maharashtra|
|   Mumbai|      Mumbai Indians|Maharashtra|
|   Mumbai|    Rajasthan Royals|Maharashtra|
|   Mumbai|      Mumbai Indians|Maharashtra|
|   Mumbai| Chennai Super Kings|Maharashtra|
|   Mumbai| Chennai Super Kings|Maharashtra|
|   Mumbai|      Delhi Capitals|Maharashtra|
|   Mumbai|      Mumbai Indians|Maharashtra|
|   Mumbai