In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import IntegerType, ArrayType, StringType

bronze_base = "/Volumes/workspace/default/imdb_bronze"

df_basics = spark.read.format("delta").load(bronze_base + "/title_basics")
df_ratings = spark.read.format("delta").load(bronze_base + "/title_ratings")

print("BASICS:")
display(df_basics.limit(5))

print("RATINGS:")
display(df_ratings.limit(5))

# 1) Filtrar tipos de título relevantes
allowed_types = ["movie", "tvSeries", "tvMovie", "short", "tvMiniSeries"]
df_basics = df_basics.filter(F.col("titleType").isin(allowed_types))

# 2) Converter colunas numéricas
df_basics = (
    df_basics
        .withColumn("startYear", F.col("startYear").cast(IntegerType()))
        .withColumn("endYear", F.col("endYear").cast(IntegerType()))
        .withColumn("runtimeMinutes", F.col("runtimeMinutes").cast(IntegerType()))
)

# 3) Transformar gêneros em array
df_basics = df_basics.withColumn(
    "genres_array",
    F.when(F.col("genres").isNull(), F.array().cast("array<string>"))
     .otherwise(F.split(F.col("genres"), ","))
)

# 4) Unir basics + ratings por tconst
df_silver = (
    df_basics.join(df_ratings, on="tconst", how="left")
             .withColumn("averageRating", F.col("averageRating").cast("double"))
             .withColumn("numVotes", F.col("numVotes").cast("long"))
)

# 5) Criar colunas auxiliares
df_silver = (
    df_silver
        .withColumn("is_adult", F.when(F.col("isAdult") == "1", True).otherwise(False))
        .withColumn("year_key", F.col("startYear"))
        .withColumn("decade", (F.col("startYear")/10).cast("int") * 10)
)

silver_base = "/Volumes/workspace/default/imdb_silver"

try:
    dbutils.fs.rm(silver_base, recurse=True)
except:
    pass

df_silver.write.format("delta").mode("overwrite").partitionBy("year_key").save(silver_base)

print("Silver salvo em:", silver_base)
display(dbutils.fs.ls(silver_base))


BASICS:


tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
tt0000001,short,Carmencita,Carmencita,0,1894,,1,"Documentary,Short"
tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,,5,"Animation,Short"
tt0000003,short,Poor Pierrot,Pauvre Pierrot,0,1892,,5,"Animation,Comedy,Romance"
tt0000004,short,Un bon bock,Un bon bock,0,1892,,12,"Animation,Short"
tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,,1,Short


RATINGS:


tconst,averageRating,numVotes
tt0000001,5.7,2186
tt0000002,5.5,306
tt0000003,6.4,2271
tt0000004,5.2,196
tt0000005,6.2,3012


Silver salvo em: /Volumes/workspace/default/imdb_silver


path,name,size,modificationTime
dbfs:/Volumes/workspace/default/imdb_silver/_delta_log/,_delta_log/,0,1764615524820
dbfs:/Volumes/workspace/default/imdb_silver/year_key=1874/,year_key=1874/,0,1764615524820
dbfs:/Volumes/workspace/default/imdb_silver/year_key=1878/,year_key=1878/,0,1764615524820
dbfs:/Volumes/workspace/default/imdb_silver/year_key=1881/,year_key=1881/,0,1764615524820
dbfs:/Volumes/workspace/default/imdb_silver/year_key=1882/,year_key=1882/,0,1764615524820
dbfs:/Volumes/workspace/default/imdb_silver/year_key=1883/,year_key=1883/,0,1764615524820
dbfs:/Volumes/workspace/default/imdb_silver/year_key=1885/,year_key=1885/,0,1764615524820
dbfs:/Volumes/workspace/default/imdb_silver/year_key=1887/,year_key=1887/,0,1764615524820
dbfs:/Volumes/workspace/default/imdb_silver/year_key=1888/,year_key=1888/,0,1764615524820
dbfs:/Volumes/workspace/default/imdb_silver/year_key=1889/,year_key=1889/,0,1764615524820
