In [1]:
import polars as pl

In [2]:
def compute_numerical_feature_from_categorical(dataset, y, categorical_feature_name, numerical_feature_name):

    feature_mean_value = dataset \
        .group_by(categorical_feature_name) \
            .agg(pl.col(y).mean()) \
        .sort(y)

    feature_mean_value = feature_mean_value.rename({y:numerical_feature_name})

    dataset = dataset.join(feature_mean_value, on=categorical_feature_name, how='left')
    dataset = dataset.drop(categorical_feature_name, numerical_feature_name+'_right')
    return dataset

### Read data

In [3]:
player_df = pl.read_csv('silver\Player.csv')
player_team_df = pl.read_csv('silver\PlayerTeam.csv')
team_df = pl.read_csv('silver\Team.csv')
team_league_df = pl.read_csv('silver\TeamLeague.csv')
nationality_df = pl.read_csv('silver/Nationality.csv')
foot_df = pl.read_csv('silver\Foot.csv')
position_df = pl.read_csv('silver\Position.csv')

### Join data

In [4]:
dataset = player_df.join(player_team_df, on='PlayerId', how='left')
dataset = dataset \
    .join(team_df, on='TeamId', how='left') \
    .join(team_league_df, on='TeamId', how='left') \
    .join(foot_df, on='FootId', how='left') \
    .join(position_df, on='PositionId', how='left') \
    .join(nationality_df, on='NationalityId', how='left') \
    .drop(['Team', 'Foot', 'Position', 'Nationality'])

dataset.head()

PlayerId,Age,Height,Weigh,Value,Wage,Crossing,Finishing,Heading accuracy,Short passing,Volleys,Dribbling,Curve,FK Accuracy,Long passing,Ball control,Acceleration,Sprint speed,Agility,Reactions,Balance,Shot power,Jumping,Stamina,Strength,Long shots,Aggression,Interceptions,Att. Position,Vision,Penalties,Composure,Defensive awareness,Standing tackle,Sliding tackle,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,NationalityId,PositionId,FootId,TeamId,LeagueId
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
3061,23,185,78,5500000,33000,41,22,75,60,34,53,47,28,53,62,52,53,55,71,51,59,75,77,73,41,75,73,23,41,42,59,77,77,75,11,5,12,14,14,94,1,2,104,7
3412,27,182,75,1800000,14000,69,42,61,69,49,65,65,47,66,68,75,77,71,58,75,64,77,81,75,54,71,68,66,66,50,69,66,71,69,14,12,9,12,13,129,2,2,121,7
4627,21,187,78,2700000,9000,64,73,61,63,55,73,53,45,55,72,77,70,67,58,63,66,76,55,69,51,49,27,69,53,55,58,30,33,30,9,10,8,7,7,62,1,2,163,13
5029,22,183,75,2300000,8000,50,43,65,68,39,66,45,45,66,66,68,65,69,66,73,60,71,67,68,42,68,68,42,46,38,53,67,68,73,12,6,11,9,11,62,4,2,173,13
2373,21,180,78,32000000,15000,35,79,70,73,67,67,49,44,53,77,82,85,65,78,86,79,90,73,92,64,70,26,83,52,66,75,28,23,24,10,11,15,8,13,13,1,2,87,8


#### Save dataset to gold directory

In [5]:
dataset.write_csv('gold/Dataset.csv')