#### **Ingesta del archivo "movie_cast.json"**


In [0]:
dbutils.widgets.text("environment","")
var_environment = dbutils.widgets.get("environment")

In [0]:
dbutils.widgets.text("file_date","2024-12-30")
var_file_date = dbutils.widgets.get("file_date")

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

##### Librerías

In [0]:
from pyspark.sql.types import StructType,StructField,IntegerType,StringType
from pyspark.sql.functions import current_timestamp,lit

##### Esquema

In [0]:
movie_cast_schema = StructType([
    StructField("movieId",IntegerType(),True),
    StructField("personId",IntegerType(),True),
    StructField("characterName",StringType(),True),
    StructField("genderId",IntegerType(),True),
    StructField("castOrder",IntegerType(),True)
])

##### Leer fichero JSON (multilínea)

In [0]:
df_movie_cast = spark.read.schema(movie_cast_schema).option("multiline",True).json(f"{bronze_folder_path}/{var_file_date}/movie_cast.json")

##### Renombrar, añadir y eliminar columnas

In [0]:
df_final = add_ingestion_date(df_movie_cast).withColumnsRenamed({"movieId":"movie_id","personId":"person_id","characterName":"character_name"}) \
                        .withColumn("environment",lit(var_environment)) \
                        .drop("genderId","castOrder").withColumn("file_date",lit(var_file_date))

##### Escribir en Silver

In [0]:
merge_condition = "target.movie_id = source.movie_id AND target.person_id = source.person_id AND target.file_date = source.file_date"
merge_delta_lake(df_final,"movie_silver","movie_casts",silver_folder_path,merge_condition,"file_date")

In [0]:
%sql
SELECT COUNT(1), file_date
FROM movie_silver.movie_casts
GROUP BY file_date

In [0]:
dbutils.notebook.exit("Success")