In [0]:
%run ./includes/configuration

In [0]:
import json
movies = {"movie": []}
for i in range(8):
    with open(moviePipelinePath + f"movie_{i}.json") as f:
        data = json.load(f)
        movies["movie"].append(data["movie"])

dbutils.fs.put(rawPath, json.dumps(movies, indent=2), True)

In [0]:
display(dbutils.fs.ls(rawPath))

path,name,size,modificationTime
dbfs:/dbfs/FileStore/movie/raw,raw,12795544,1661448323000


In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *
from pyspark.sql.functions import explode

movie_path = "/FileStore/movie/*.json"     
movie_schema = StructType().add('movie', ArrayType(
    StructType([
    StructField('BackdropUrl', StringType(), True),
    StructField('Budget', FloatType(), True),
    StructField('CreatedBy', StringType(), True),
    StructField('CreatedDate', TimestampType(), True),
    StructField('Id', LongType(), True),
    StructField('ImdbUrl', StringType(), True),
    StructField('OriginalLanguage', StringType(), True),
    StructField('Overview', StringType(), True),
    StructField('PosterUrl', StringType(), True),
    StructField('Price', FloatType(), True),
    StructField('ReleaseDate', TimestampType(), True),
    StructField('Revenue', FloatType(), True),
    StructField('RunTime', IntegerType(), True),
    StructField('Tagline', StringType(), True),
    StructField('Title', StringType(), True),
    StructField('TmdbUrl', StringType(), True),
    StructField('UpdatedBy', StringType(), True),
    StructField('UpdatedDate', TimestampType(), True),
    StructField('genres', ArrayType(StructType([
        StructField('id', LongType(), True),
        StructField('name', StringType(), True),
    ])), True),
])
), True)
movie_data_df = (
  spark.read.format("json").option("multiline","true").schema(movie_schema).load(path = movie_path)
)
movie_data_df = (movie_data_df.select(explode(movie_data_df.movie)))

In [0]:
dbutils.fs.rm(bronzePath, recurse=True)

In [0]:

# movie_schema = "value STRING"

# movie_data_df = (
# spark.read.format("text").schema(movie_schema).load(movie_path)
  
#  )

In [0]:
from pyspark.sql.functions import current_timestamp, lit

movie_data_df = (
 movie_data_df.select(
   "col",
    lit("www.imdb.com").alias("datasource"),
    current_timestamp().alias ("ingesttime"),
    lit("new").alias("status"),
    current_timestamp().cast("date").alias("ingestdate")
  )
)

In [0]:
display(movie_data_df.limit(5) )


col,datasource,ingesttime,status,ingestdate
"List(https://image.tmdb.org/t/p/original//s3TBrRGB1iav7gFOCNx3H31MoES.jpg, 1.6E8, null, 2021-04-03T16:51:30.163+0000, 1, https://www.imdb.com/title/tt1375666, en, Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his targets is offered a chance to regain his old life as payment for a task considered to be impossible: ""inception"", the implantation of another person's idea into a target's subconscious., https://image.tmdb.org/t/p/w342//9gk7adHYeDvHkCSEqAvQNLV5Uge.jpg, 9.9, 2010-07-15T00:00:00.000+0000, 8.2553274E8, 148, Your mind is the scene of the crime., Inception, https://www.themoviedb.org/movie/27205, null, null, List(List(1, Adventure), List(6, Action), List(13, Science Fiction)))",www.imdb.com,2022-08-25T17:31:09.313+0000,new,2022-08-25
"List(https://image.tmdb.org/t/p/original//xJHokMbljvjADYdit5fK5VQsXEG.jpg, 1.65E8, null, 2021-04-03T16:51:30.163+0000, 2, https://www.imdb.com/title/tt0816692, en, The adventures of a group of explorers who make use of a newly discovered wormhole to surpass the limitations on human space travel and conquer the vast distances involved in an interstellar voyage., https://image.tmdb.org/t/p/w342//gEU2QniE6E77NI6lCU6MxlNBvIx.jpg, 9.9, 2014-11-05T00:00:00.000+0000, 6.7512E8, 169, Mankind was born on Earth. It was never meant to die here., Interstellar, https://www.themoviedb.org/movie/157336, null, null, List(List(1, Adventure), List(4, Drama), List(13, Science Fiction)))",www.imdb.com,2022-08-25T17:31:09.313+0000,new,2022-08-25
"List(https://image.tmdb.org/t/p/original//hkBaDkMWbLaf8B1lsWsKX7Ew3Xq.jpg, 1.85E8, null, 2021-04-03T16:51:30.163+0000, 3, https://www.imdb.com/title/tt0468569, en, Batman raises the stakes in his war on crime. With the help of Lt. Jim Gordon and District Attorney Harvey Dent, Batman sets out to dismantle the remaining criminal organizations that plague the streets. The partnership proves to be effective, but they soon find themselves prey to a reign of chaos unleashed by a rising criminal mastermind known to the terrified citizens of Gotham as the Joker., https://image.tmdb.org/t/p/w342//qJ2tW6WMUDux911r6m7haRef0WH.jpg, 9.9, 2008-07-16T00:00:00.000+0000, 1.00455846E9, 152, Why So Serious?, The Dark Knight, https://www.themoviedb.org/movie/155, null, null, List(List(4, Drama), List(6, Action), List(10, Thriller), List(11, Crime)))",www.imdb.com,2022-08-25T17:31:09.313+0000,new,2022-08-25
"List(https://image.tmdb.org/t/p/original//en971MEXui9diirXlogOrPKmsEn.jpg, 5.8E7, null, 2021-04-03T16:51:30.163+0000, 4, https://www.imdb.com/title/tt1431045, en, Deadpool tells the origin story of former Special Forces operative turned mercenary Wade Wilson, who after being subjected to a rogue experiment that leaves him with accelerated healing powers, adopts the alter ego Deadpool. Armed with his new abilities and a dark, twisted sense of humor, Deadpool hunts down the man who nearly destroyed his life., https://image.tmdb.org/t/p/w342//yGSxMiF0cYuAiyuve5DA6bnWEOI.jpg, 9.9, 2016-02-09T00:00:00.000+0000, 7.8310003E8, 108, Witness the beginning of a happy ending, Deadpool, https://www.themoviedb.org/movie/293660, null, null, List(List(1, Adventure), List(6, Action), List(7, Comedy)))",www.imdb.com,2022-08-25T17:31:09.313+0000,new,2022-08-25
"List(https://image.tmdb.org/t/p/original//kwUQFeFXOOpgloMgZaadhzkbTI4.jpg, 2.2E8, null, 2021-04-03T16:51:30.166+0000, 5, https://www.imdb.com/title/tt0848228, en, When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!, https://image.tmdb.org/t/p/w342//RYMX2wcKCBAr24UyPD7xwmjaTn.jpg, 9.9, 2012-04-25T00:00:00.000+0000, 1.51955789E9, 143, Some assembly required., The Avengers, https://www.themoviedb.org/movie/24428, null, null, List(List(1, Adventure), List(6, Action), List(13, Science Fiction)))",www.imdb.com,2022-08-25T17:31:09.313+0000,new,2022-08-25


In [0]:

from pyspark.sql.functions import col

(
  movie_data_df.select(
  "datasource",
  "ingesttime",
  "col",
  "status",
  col("ingestdate").alias("p_ingestdate"),
  )
  .write.format("delta")
  .mode("append")
  .partitionBy("p_ingestdate")
  .save(bronzePath)
)

In [0]:
display(dbutils.fs.ls(bronzePath))

path,name,size,modificationTime
dbfs:/dbfs/FileStore/movie/bronze/_delta_log/,_delta_log/,0,1661448350000
dbfs:/dbfs/FileStore/movie/bronze/p_ingestdate=2022-08-25/,p_ingestdate=2022-08-25/,0,1661448348000


In [0]:
spark.sql("""
DROP TABLE IF EXISTS movie_bronze;
""")

spark.sql(f"""
CREATE TABLE movie_bronze
USING DELTA
LOCATION "{bronzePath}"
""")

In [0]:
%sql

SELECT * FROM movie_bronze 
LIMIT 5


datasource,ingesttime,col,status,p_ingestdate
www.imdb.com,2022-08-25T17:25:46.753+0000,"List(https://image.tmdb.org/t/p/original//s3TBrRGB1iav7gFOCNx3H31MoES.jpg, 1.6E8, null, 2021-04-03T16:51:30.163+0000, 1, https://www.imdb.com/title/tt1375666, en, Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his targets is offered a chance to regain his old life as payment for a task considered to be impossible: ""inception"", the implantation of another person's idea into a target's subconscious., https://image.tmdb.org/t/p/w342//9gk7adHYeDvHkCSEqAvQNLV5Uge.jpg, 9.9, 2010-07-15T00:00:00.000+0000, 8.2553274E8, 148, Your mind is the scene of the crime., Inception, https://www.themoviedb.org/movie/27205, null, null, List(List(1, Adventure), List(6, Action), List(13, Science Fiction)))",new,2022-08-25
www.imdb.com,2022-08-25T17:25:46.753+0000,"List(https://image.tmdb.org/t/p/original//xJHokMbljvjADYdit5fK5VQsXEG.jpg, 1.65E8, null, 2021-04-03T16:51:30.163+0000, 2, https://www.imdb.com/title/tt0816692, en, The adventures of a group of explorers who make use of a newly discovered wormhole to surpass the limitations on human space travel and conquer the vast distances involved in an interstellar voyage., https://image.tmdb.org/t/p/w342//gEU2QniE6E77NI6lCU6MxlNBvIx.jpg, 9.9, 2014-11-05T00:00:00.000+0000, 6.7512E8, 169, Mankind was born on Earth. It was never meant to die here., Interstellar, https://www.themoviedb.org/movie/157336, null, null, List(List(1, Adventure), List(4, Drama), List(13, Science Fiction)))",new,2022-08-25
www.imdb.com,2022-08-25T17:25:46.753+0000,"List(https://image.tmdb.org/t/p/original//hkBaDkMWbLaf8B1lsWsKX7Ew3Xq.jpg, 1.85E8, null, 2021-04-03T16:51:30.163+0000, 3, https://www.imdb.com/title/tt0468569, en, Batman raises the stakes in his war on crime. With the help of Lt. Jim Gordon and District Attorney Harvey Dent, Batman sets out to dismantle the remaining criminal organizations that plague the streets. The partnership proves to be effective, but they soon find themselves prey to a reign of chaos unleashed by a rising criminal mastermind known to the terrified citizens of Gotham as the Joker., https://image.tmdb.org/t/p/w342//qJ2tW6WMUDux911r6m7haRef0WH.jpg, 9.9, 2008-07-16T00:00:00.000+0000, 1.00455846E9, 152, Why So Serious?, The Dark Knight, https://www.themoviedb.org/movie/155, null, null, List(List(4, Drama), List(6, Action), List(10, Thriller), List(11, Crime)))",new,2022-08-25
www.imdb.com,2022-08-25T17:25:46.753+0000,"List(https://image.tmdb.org/t/p/original//en971MEXui9diirXlogOrPKmsEn.jpg, 5.8E7, null, 2021-04-03T16:51:30.163+0000, 4, https://www.imdb.com/title/tt1431045, en, Deadpool tells the origin story of former Special Forces operative turned mercenary Wade Wilson, who after being subjected to a rogue experiment that leaves him with accelerated healing powers, adopts the alter ego Deadpool. Armed with his new abilities and a dark, twisted sense of humor, Deadpool hunts down the man who nearly destroyed his life., https://image.tmdb.org/t/p/w342//yGSxMiF0cYuAiyuve5DA6bnWEOI.jpg, 9.9, 2016-02-09T00:00:00.000+0000, 7.8310003E8, 108, Witness the beginning of a happy ending, Deadpool, https://www.themoviedb.org/movie/293660, null, null, List(List(1, Adventure), List(6, Action), List(7, Comedy)))",new,2022-08-25
www.imdb.com,2022-08-25T17:25:46.753+0000,"List(https://image.tmdb.org/t/p/original//kwUQFeFXOOpgloMgZaadhzkbTI4.jpg, 2.2E8, null, 2021-04-03T16:51:30.166+0000, 5, https://www.imdb.com/title/tt0848228, en, When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!, https://image.tmdb.org/t/p/w342//RYMX2wcKCBAr24UyPD7xwmjaTn.jpg, 9.9, 2012-04-25T00:00:00.000+0000, 1.51955789E9, 143, Some assembly required., The Avengers, https://www.themoviedb.org/movie/24428, null, null, List(List(1, Adventure), List(6, Action), List(13, Science Fiction)))",new,2022-08-25
