In [0]:

from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, DateType, BooleanType


# Definir la ruta base donde est√°n todos los CSV
base_path = "/Volumes/football/football-data/football-csv/"
    
#Leer CSV y guardar una tabla bronze 
def create_bronze_table(csv_name: str, schema: StructType):

    csv_path = f"{base_path}{csv_name}.csv"
    table_name = f"{csv_name}_bronze"

    # Leer CSV con un schema predefinido
    bronze_table = (spark
        .read
        .format("csv") 
        .option("header", "true") 
        .schema(schema)
        #.option("inferSchema", "true") 
        .load(csv_path)
    )
    
    # Guardar como tabla Bronze en Delta
    (bronze_table
        .write 
        .format("delta")
        .mode("overwrite")
        .option("overwriteSchema", "true")
        .saveAsTable(f"football.`bronze-football-data`.{table_name}"))
    

#Definir esquemas
players_schema = StructType(
  [
    StructField("player_id", IntegerType()),
    StructField("first_name", StringType(), True),
    StructField("last_name", StringType(), True),
    StructField("name", StringType(), True),
    StructField("last_season", IntegerType(), True),
    StructField("current_club_id", IntegerType(), True),
    StructField("player_code", StringType(), True),
    StructField("country_of_birth", StringType(), True),
    StructField("city_of_birth", StringType(), True),
    StructField("country_of_citizenship", StringType(), True),
    StructField("date_of_birth", DateType(), True),
    StructField("sub_position", StringType(), True),
    StructField("position", StringType(), True),
    StructField("foot", StringType(), True),
    StructField("height_in_cm", IntegerType(), True),
    StructField("contract_expiration_date", DateType(), True),
    StructField("agent_name", StringType(), True),
    StructField("image_url", StringType(), True),
    StructField("url", StringType(), True),
    StructField("current_club_domestic_competition_id", StringType(), True),
    StructField("current_club_name", StringType(), True),
    StructField("market_value_in_eur", DoubleType(), True),
    StructField("highest_market_value_in_eur", DoubleType(), True)
  ]
)

appearances_schema = StructType(
  [
    StructField("appearance_id", IntegerType()),
    StructField("game_id", IntegerType(), True),
    StructField("player_id", IntegerType(), True),
    StructField("player_club_id", IntegerType(), True),
    StructField("player_current_club_id", IntegerType(), True),
    StructField("date", DateType(), True),
    StructField("player_name", StringType(), True),
    StructField("competition_id", StringType(),True),
    StructField("yellow_cards", IntegerType(), True),
    StructField("red_cards", IntegerType(), True),
    StructField("goals", IntegerType(), True),
    StructField("assists", IntegerType(), True),
    StructField("minutes_played", IntegerType(), True)
  ]
    )

club_games_schema = StructType(
  [
    StructField("game_id", IntegerType()),
    StructField("club_id", IntegerType(),True),
    StructField("own_goals", IntegerType(),True),
    StructField("own_position", IntegerType(),True),
    StructField("own_manager_name", StringType(),True),
    StructField("opponent_id", IntegerType(), True),
    StructField("opponent_goals", IntegerType(),True),
    StructField("opponent_position", IntegerType(),True),
    StructField("opponent_manager_name", StringType(),True), 
    StructField("hosting", StringType(),True),
    StructField("is_win", IntegerType(),True)
  ]
)

clubs_schema = StructType(
  [
    StructField("club_id", IntegerType()),
    StructField("club_code", StringType(), True),
    StructField("name", StringType(), True),
    StructField("domestic_competition_id", StringType(), True),
    StructField("total_market_value", StringType(), True),
    StructField("squad_size", IntegerType(), True),
    StructField("average_age", DoubleType(), True),
    StructField("foreigners_number", IntegerType(), True),
    StructField("foreigners_percentage", DoubleType(), True),
    StructField("national_team_players", IntegerType(), True),
    StructField("stadium_name", StringType(), True),
    StructField("stadium_seats", IntegerType(), True),
    StructField("net_transfer_record", StringType(), True),
    StructField("coach_name", StringType(), True),
    StructField("last_season", IntegerType(), True),
    StructField("filename", StringType(), True),
    StructField("url", StringType(), True)
  ]
)

competitions_schema = StructType( [
    StructField("competition_id", StringType()),
    StructField("competition_code", StringType(), True),
    StructField("name", StringType(), True),
    StructField("sub_type", StringType(), True),
    StructField("type", StringType(), True),
    StructField("country_id", IntegerType(), True),
    StructField("country_name", StringType(), True),
    StructField("domestic_league_code", StringType(), True),
    StructField("confederation", StringType(), True),
    StructField("url", StringType(), True),
    StructField("is_major_national_league", BooleanType(), True)
    ]
)

game_events_schema = StructType(
  [
    StructField("game_event_id", IntegerType()),
    StructField("date", DateType(), True),
    StructField("game_id", IntegerType(), True),
    StructField("minute", IntegerType(), True),
    StructField("type", StringType(), True),
    StructField("club_id", IntegerType(), True),
    StructField("player_id", IntegerType(), True),
    StructField("description", StringType(), True),
    StructField("player_in_id", IntegerType(), True),
    StructField("player_assist_id", IntegerType(), True)
  ]
)

game_lineups_schema = StructType(
    [
        StructField("game_lineups_id", StringType()),
        StructField("date", DateType(), True),
        StructField("game_id", IntegerType(), True),
        StructField("player_id", IntegerType(), True),
        StructField("club_id", IntegerType(), True),
        StructField("player_name", StringType(), True),
        StructField("type", StringType(), True),
        StructField("position", StringType(), True),
        StructField("number", IntegerType(), True),
        StructField("team_captain", BooleanType(), True)
    ]
)

games_schema = StructType(
  [
    StructField("game_id", IntegerType()),
    StructField("competition_id", StringType(), True),
    StructField("season", IntegerType(), True),
    StructField("round", StringType(), True),
    StructField("date", DateType(), True),
    StructField("home_club_id", IntegerType(), True),
    StructField("away_club_id", IntegerType(), True),
    StructField("home_club_goals", IntegerType(), True),
    StructField("away_club_goals", IntegerType(), True),
    StructField("home_club_position", IntegerType(), True),
    StructField("away_club_position", IntegerType(), True),
    StructField("home_club_manager_name", StringType(), True),
    StructField("away_club_manager_name", StringType(), True),
    StructField("stadium", StringType(), True),
    StructField("attendance", IntegerType(), True),
    StructField("referee", StringType(), True),
    StructField("url", StringType(), True),
    StructField("home_club_formation", StringType(), True),
    StructField("away_club_formation", StringType(), True),
    StructField("home_club_name", StringType(), True),
    StructField("away_club_name", StringType(), True),
    StructField("aggregate", StringType(), True),
    StructField("competition_type", StringType(), True)
  ]
)
player_valuations_schema = StructType(
    [
        StructField("player_id", IntegerType()),
        StructField("date", DateType(), True),
        StructField("market_value_in_eur", DoubleType(), True),
        StructField("current_club_id", IntegerType(), True),
        StructField("current_club_name", StringType(), True),
        StructField("player_club_domestic_competition_id", StringType(), True)
    ]
)

players_schema = StructType(
  [
    StructField("player_id", IntegerType()),
    StructField("first_name", StringType(), True),
    StructField("last_name", StringType(), True),
    StructField("name", StringType(), True),
    StructField("last_season", IntegerType(), True),
    StructField("current_club_id", IntegerType(), True),
    StructField("player_code", StringType(), True),
    StructField("country_of_birth", StringType(), True),
    StructField("city_of_birth", StringType(), True),
    StructField("country_of_citizenship", StringType(), True),
    StructField("date_of_birth", DateType(), True),
    StructField("sub_position", StringType(), True),
    StructField("position", StringType(), True),
    StructField("foot", StringType(), True),
    StructField("height_in_cm", IntegerType(), True),
    StructField("contract_expiration_date", DateType(), True),
    StructField("agent_name", StringType(), True),
    StructField("image_url", StringType(), True),
    StructField("url", StringType(), True),
    StructField("current_club_domestic_competition_id", StringType(), True),
    StructField("current_club_name", StringType(), True),
    StructField("market_value_in_eur", DoubleType(), True),
    StructField("highest_market_value_in_eur", DoubleType(), True)
  ]
)

transfers_schema = StructType(
    [
        StructField("player_id", IntegerType()),
        StructField("transfer_date", DateType(), True),
        StructField("transfer_season", StringType(), True),
        StructField("from_club_id", IntegerType(), True),
        StructField("to_club_id", IntegerType(), True),
        StructField("from_club_name", StringType(), True),
        StructField("to_club_name", StringType(), True),
        StructField("transfer_fee", DoubleType(), True),
        StructField("market_value_in_eur", DoubleType(), True),
        StructField("player_name", StringType(), True)
    ]
)

create_bronze_table("appearances", appearances_schema)
create_bronze_table("club_games", club_games_schema)
create_bronze_table("clubs", clubs_schema)
create_bronze_table("competitions", competitions_schema)
create_bronze_table("game_events", game_events_schema)
create_bronze_table("game_lineups", game_lineups_schema)
create_bronze_table("games", games_schema)
create_bronze_table("player_valuations", player_valuations_schema)
create_bronze_table("players", players_schema)
create_bronze_table("transfers", transfers_schema)




