In [79]:
import polars as pl
import re

In [80]:
def create_ref_table(full_table, column_name):
    df = full_table \
        .select(column_name) \
        .unique()

    df = df \
        .with_columns(
            pl.Series(column_name+'Id', list(range(1, len(df) + 1)))
        )

    full_table = full_table \
        .join(df, on=column_name, how='left') \
        .drop(column_name)
    
    return df, full_table

# Define a function to convert the monetary values to integers
def convert_to_int(value):
    if 'M' in value:
        # Remove the '€' symbol and 'M', then convert to float and multiply by 1,000,000
        return int(float(value.replace('€', '').replace('M', '')) * 1000000)
    elif 'K' in value:
        # Remove the '€' symbol and 'K', then convert to float and multiply by 1,000
        return int(float(value.replace('€', '').replace('K', '')) * 1000)
    else:
        # Just remove the '€' symbol and convert to int
        return int(value.replace('€', ''))

def transform_position(df):
    goal_keeper = ['GK', 'Gardien de but']
    center_back = ['CB', 'Défenseur central', 'Défense']
    left_right_back = ['LCB', 'RCB', 'RB', 'LB', 'LWB', 'RWB', 'Arrière gauche', 'Arrière droit']
    defensive_midfield = ['CDM', 'LDM', 'RDM', 'Milieu défensif']
    attacking_midfield = ['RM', 'LM', 'CM', 'RCM', 'LCM', 'LAM', 'RAM', 'CAM',
                          'Milieu offensif', 'Milieu droit', 'Milieu gauche',
                          'Milieu de terrain', 'Milieu central']
    attack = ['LW', 'RW', 'RF', 'LF', 'CF', 'LS', 'RS', 'ST', 'Ailier droit',
              'Ailier gauche', 'Avant-centre', 'Deuxième attaquant', '']

    df = df.with_columns(
        pl.col("Position").map_elements(
            lambda value: (
                'Goal Keeper' if value in goal_keeper else
                'Center Back' if value in center_back else
                'Wing Back' if value in left_right_back else
                'Defensive Midfield' if value in defensive_midfield else
                'Attacking Midfield' if value in attacking_midfield else
                'Forward' if value in attack else
                value
            )
        ).alias("Position")
    )

    return df

#### Read data

In [81]:
player_df = pl.read_csv('raw\Player.csv', separator=';').drop('PlayerName_right')
player_team_df = pl.read_csv('raw\PlayerTeam.csv', separator=';')
team_df = pl.read_csv('raw\Team.csv', separator=';')

#### Process dataframes

In [83]:
league_df, team_df = create_ref_table(team_df, 'League')

team_df = team_df.drop('Link')

team_league_df = team_df \
    .drop('Team') \
    .unique()

team_df = team_df \
    .drop('LeagueId')

player_team_df = player_team_df \
    .select(['PlayerId', 'TeamId'])

### keep player name
player_name_df = player_df \
    .select(['PlayerId', 'PlayerName'])

player_df = player_df \
    .drop('PlayerName')

nationality_df, player_df = create_ref_table(player_df, 'Nationality')
player_df = transform_position(player_df)
position_df, player_df = create_ref_table(player_df, 'Position')
foot_df, player_df = create_ref_table(player_df, 'Foot')

In [84]:
# Apply the conversion function to the column
player_df = player_df.with_columns(pl.col("Value").map_elements(convert_to_int))
player_df = player_df.with_columns(pl.col("Wage").map_elements(convert_to_int))

#### Save dataframes into silver directory

In [85]:
player_df.write_csv('silver/Player.csv')

team_df.write_csv('silver/Team.csv')
league_df.write_csv('silver/League.csv')
nationality_df.write_csv('silver/Nationality.csv')
foot_df.write_csv('silver/Foot.csv')
position_df.write_csv('silver/Position.csv')

player_team_df.write_csv('silver/PlayerTeam.csv')
team_league_df.write_csv('silver/TeamLeague.csv')
player_name_df.write_csv('silver/PlayerName.csv')