In [243]:
import polars as pl
import re

In [244]:
def create_ref_table(full_table, column_name):
    df = full_table \
        .select(column_name) \
        .unique()

    df = df \
        .with_columns(
            pl.Series(column_name+'Id', list(range(1, len(df) + 1)))
        )

    full_table = full_table \
        .join(df, on=column_name, how='left') \
        .drop(column_name)
    
    return df, full_table

# Define a function to convert the monetary values to integers
def convert_to_int(value):
    if 'M' in value:
        # Remove the '€' symbol and 'M', then convert to float and multiply by 1,000,000
        return int(float(value.replace('€', '').replace('M', '')) * 1000000)
    elif 'K' in value:
        # Remove the '€' symbol and 'K', then convert to float and multiply by 1,000
        return int(float(value.replace('€', '').replace('K', '')) * 1000)
    else:
        # Just remove the '€' symbol and convert to int
        return int(value.replace('€', ''))

#### Read data

In [245]:
player_df = pl.read_csv('raw\Player.csv', separator=';')
player_team_df = pl.read_csv('raw\PlayerTeam.csv', separator=';')
team_df = pl.read_csv('raw\Team.csv', separator=';')

#### Process dataframes

In [246]:
league_df, team_df = create_ref_table(team_df, 'League')

team_df = team_df.drop('Link')

team_league_df = team_df \
    .drop('Team') \
    .unique()

team_df = team_df \
    .drop('LeagueId')

player_team_df = player_team_df \
    .select(['PlayerId', 'TeamId'])

### keep player name
player_name_df = player_df \
    .select(['PlayerId', 'PlayerName'])

player_df = player_df \
    .drop('PlayerName')

nationality_df, player_df = create_ref_table(player_df, 'Nationality')
position_df, player_df = create_ref_table(player_df, 'Position')
foot_df, player_df = create_ref_table(player_df, 'Foot')

In [247]:
## rerun everything to extract best position ... 
## keep the information of sub or not sub ...

In [248]:
# Apply the conversion function to the column
player_df = player_df.with_columns(pl.col("Value").map_elements(convert_to_int))
player_df = player_df.with_columns(pl.col("Wage").map_elements(convert_to_int))

#### Save dataframes into silver directory

In [249]:
player_df.write_csv('silver/Player.csv', separator=";")

team_df.write_csv('silver/Team.csv', separator=";")
league_df.write_csv('silver/League.csv', separator=";")
nationality_df.write_csv('silver/Nationality.csv', separator=";")
foot_df.write_csv('silver/Foot.csv', separator=";")
position_df.write_csv('silver/Position.csv', separator=";")

player_team_df.write_csv('silver/PlayerTeam.csv', separator=";")
team_league_df.write_csv('silver/TeamLeague.csv', separator=";")
player_name_df.write_csv('silver/PlayerName.csv', separator=";")