In [0]:
from pyspark.sql.functions import udf, when, split, col, explode
from pyspark.sql.types import StringType
import uuid
                           
def generate_uuid():
    return str(uuid.uuid4())
uuid_udf = udf(generate_uuid, StringType())

df = spark.read.table("imdb_dev.bronze.title_akas") 
#99.4% attributes, 69.3% types,  33.1% languages, 22.5% region 

df_title_AKAS = (
    df.select("titleId", "title", "region", "types")
    .filter(col("region") != "\\N")
    .withColumn("titleAKASId", uuid_udf())
    .withColumn("types", when(col("types") == "\\N", "Unknown").otherwise(col("types")))
    .withColumn("types", split("types", "\u0002"))
).cache()

In [0]:
df_region = (
    df.select("region")
    .distinct()
    .withColumn("regionId", uuid_udf())
)
df_region = df_region.select("regionId", "region")
df_region.write.format("delta").mode("overwrite").insertInto("imdb_dev.silver.Region")

In [0]:
df_type_AKAS = (
    df_title_AKAS.select(explode("types").alias("typeAKAS"))
    .distinct()
    .withColumn("typeAKASId", uuid_udf())
)
df_type_AKAS = df_type_AKAS.select("typeAKASId", "typeAKAS")
df_type_AKAS.write.format("delta").mode("overwrite").insertInto("imdb_dev.silver.TypeAKAS")

In [0]:
df_type_AKAS = spark.read.table("imdb_dev.silver.TypeAKAS") 

df_type_of_alternative = (
    df_title_AKAS.select("titleAKASId", explode("types").alias("typeAKAS"))
    .join(df_type_AKAS, on="typeAKAS", how="left")
    .drop("typeAKAS")
)
df_type_of_alternative.write.format("delta").mode("overwrite").insertInto("imdb_dev.silver.TypeOfAlternative")

In [0]:
df_region = spark.read.table("imdb_dev.silver.Region") 

df_title_AKAS = (
    df_title_AKAS.join(df_region, on="region", how="left")
    .drop("region", "types")
)
df_title_AKAS = df_title_AKAS.select("titleAKASId", "titleId", "title", "regionId")
df_title_AKAS.write.format("delta").mode("overwrite").insertInto("imdb_dev.silver.TitleAKAS")
