In [0]:
from pyspark.sql.functions import explode

for i in range(0, 8):
    filePath_0 = f"dbfs:/FileStore/movie_{i}.json"
    movie_i = spark.read.option("multiline", "true").option("inferSchema", "true").json(filePath_0)
    movie_i_e = movie_i.select(explode(movie_i['movie']).alias("movie_column"))
    movie_i_e = movie_i_e.selectExpr("movie_column.*")
    movie_i_e.write.format("delta").mode("overwrite").saveAsTable(f"bronze_movie_{i}")

In [0]:
%sql

SELECT * FROM hive_metastore.default.bronze_movie_0 LIMIT 1

BackdropUrl,Budget,CreatedBy,CreatedDate,Id,ImdbUrl,OriginalLanguage,Overview,PosterUrl,Price,ReleaseDate,Revenue,RunTime,Tagline,Title,TmdbUrl,UpdatedBy,UpdatedDate,genres
https://image.tmdb.org/t/p/original//s3TBrRGB1iav7gFOCNx3H31MoES.jpg,160000000.0,,2021-04-03T16:51:30.1633333,1,https://www.imdb.com/title/tt1375666,en,"Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his targets is offered a chance to regain his old life as payment for a task considered to be impossible: ""inception"", the implantation of another person's idea into a target's subconscious.",https://image.tmdb.org/t/p/w342//9gk7adHYeDvHkCSEqAvQNLV5Uge.jpg,9.9,2010-07-15T00:00:00,825532764.0,148,Your mind is the scene of the crime.,Inception,https://www.themoviedb.org/movie/27205,,,"List(List(1, Adventure), List(6, Action), List(13, Science Fiction))"


In [0]:
# Move bronze tables to silver tables with multiple columns
for i in range(0, 8):
    column_names = spark.sql(f"DESCRIBE bronze_movie_{i}").select("col_name").rdd.flatMap(lambda x: x).collect()
    column_names_expr = ", ".join(column_names)
    spark.sql(f"CREATE TABLE IF NOT EXISTS silver_movie_{i} USING DELTA AS SELECT {column_names_expr} FROM bronze_movie_{i}")

In [0]:
# Show one of the silver tables
spark.sql("SELECT * FROM silver_movie_0").show()

+--------------------+
|        movie_column|
+--------------------+
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
|{https://image.tm...|
+--------------------+
only showing top 20 rows



In [0]:
%sql

UPDATE hive_metastore.default.silver_movie_0
SET movie_column.RunTime = 0
WHERE movie_column.RunTime < 0;

UPDATE hive_metastore.default.silver_movie_1
SET movie_column.RunTime = 0
WHERE movie_column.RunTime < 0;

UPDATE hive_metastore.default.silver_movie_2
SET movie_column.RunTime = 0
WHERE movie_column.RunTime < 0;

UPDATE hive_metastore.default.silver_movie_3
SET movie_column.RunTime = 0
WHERE movie_column.RunTime < 0;

UPDATE hive_metastore.default.silver_movie_4
SET movie_column.RunTime = 0
WHERE movie_column.RunTime < 0;

UPDATE hive_metastore.default.silver_movie_5
SET movie_column.RunTime = 0
WHERE movie_column.RunTime < 0;

UPDATE hive_metastore.default.silver_movie_6
SET movie_column.RunTime = 0
WHERE movie_column.RunTime < 0;

UPDATE hive_metastore.default.silver_movie_7
SET movie_column.RunTime = 0
WHERE movie_column.RunTime < 0;

num_affected_rows
2


In [0]:
# Show silver table silver_movie_0 with column names
spark.sql("DESCRIBE silver_movie_0").show()

+------------+--------------------+-------+
|    col_name|           data_type|comment|
+------------+--------------------+-------+
|movie_column|struct<BackdropUr...|   NULL|
+------------+--------------------+-------+

