In [29]:
import pickle as pk

In [30]:
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import SelectPercentile, mutual_info_regression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler, OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer, KNNImputer
# VIEWING OPTIONS IN THE NOTEBOOK
from sklearn import set_config; set_config(display='diagram')

In [31]:
import pandas as pd
import numpy as np
import seaborn as sns

In [32]:
numerical_features = [ 'games_2022', 'minutes_played_2022', 'goals_2022',
       'assists_2022', 'goals_against_2022', 'goals_for_2022',
       'clean_sheet_2022', 'height_in_cm', 'age', 
       'club_value', 'squad_size', 'term_days_remaining', 'yellow_cards_2022', 'red_cards_2022']
categorical_features = ['sub_position', 'foot']
ordinal_features = ['country_of_citizenship','current_club_domestic_competition_id', 'current_club_name']
target = 'market_value_in_eur'

numerical_transformer =  MinMaxScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')


feat_ordinal_dict = {
    "country_of_citizenship": ['Indonesia', 'Tajikistan', 'Faroe Islands', 'Moldova', 'Belarus', 'Montserrat', 'Uganda', 'Northern Ireland', 'Peru', 'Azerbaijan', 'Curacao', 'Costa Rica', 'Ireland', 'Iceland', 'Mozambique', 'Congo', 'Sierra Leone', 'Grenada', 'Finland', 'Panama', 'Guadeloupe', 'South Africa', 'Iraq', 'Chad', 'Tunisia', 'Angola', 'Venezuela', 'Greece', 'Australia', 'North Macedonia', 'Tanzania', 'Cape Verde', 'Slovakia', 'Scotland', 'Palestine', 'Luxembourg', 'Ukraine', 'Latvia', 'Jordan', 'Martinique', 'Comoros', 'Russia', 'Romania', 'Honduras', 'Turkey', 'Montenegro', 'Zimbabwe', 'Ecuador', 'Chile', 'Bosnia-Herzegovina', 'The Gambia', 'Denmark', 'Dominican Republic', 'Kosovo', 'Mali', 'Guinea', 'Mauritania', 'New Zealand', 'Guinea-Bissau', 'Uzbekistan', 'Armenia', 'Netherlands', 'Japan', 'Morocco', 'Gabon', 'Ghana', 'Slovenia', 'Togo', 'Philippines', 'Israel', 'Switzerland', 'Trinidad and Tobago', 'Kazakhstan', 'Austria', 'Suriname', 'Cameroon', 'Burkina Faso', 'Algeria', 'Paraguay', 'Belgium', 'Iran', 'Benin', 'Senegal', 'DR Congo', 'Sweden', 'Serbia', 'Wales', 'Croatia', 'Jamaica', 'Spain', 'Albania', 'Nigeria', 'Zambia', 'Portugal', 'Uruguay', 'Italy', 'United States', "Cote d'Ivoire", 'Germany', 'Colombia', 'Poland', 'Argentina', 'France', 'Canada', 'Brazil', 'Norway', 'Mexico', 'Hungary', 'Georgia', 'Czech Republic', 'Korea, South', 'England', 'Egypt'],
    "current_club_name":['Fk Minaj', 'Pfk Lviv', 'Gaziantep Fk', 'Ingulets Petrove', 'Kryvbas Kryvyi Rig', 'Metal Kharkiv', 'Nk Veres Rivne', 'Metalist 1925 Kharkiv', 'Chornomorets Odessa', 'Motherwell Fc', 'Dundee United Fc', 'Pas Lamia 1964', 'Pas Giannina', 'Lyngby Bk', 'Kilmarnock Fc', 'Rukh Lviv', 'Fk Oleksandriya', 'Ac Horsens', 'Kolos Kovalivka', 'Asteras Tripolis', 'Aberdeen Fc', 'Ross County Fc', 'St Mirren Fc', 'Go Ahead Eagles Deventer', 'St Johnstone Fc', 'Livingston Fc', 'Viborg Ff', 'Fakel Voronezh', 'Fc Volendam', 'Volos Nps', 'Ionikos Nikeas', 'Ofi Kreta', 'Torpedo Moskau', 'Istanbulspor', 'Fc Emmen', 'Apo Levadiakos', 'Panetolikos Gfs', 'Odense Boldklub', 'Rfc Seraing', 'Zorya Lugansk', 'Portimonense Sc', 'Sbv Excelsior Rotterdam', 'Gd Chaves', 'Heart Of Midlothian Fc', 'Vorskla Poltava', 'Aarhus Gf', 'Rio Ave Fc', 'Sv Zulte Waregem', 'Vv St Truiden', 'Fk Nizhny Novgorod', 'Umraniyespor', 'Sparta Rotterdam', 'Aalborg Bk', 'Boavista Porto Fc', 'Silkeborg If', 'Randers Fc', 'Kv Oostende', 'Ural Ekaterinburg', 'Fc Pacos De Ferreira', 'Fc Vizela', 'Casa Pia Ac', 'Rkc Waalwijk', 'Kas Eupen', 'Fc Arouca', 'Sc Cambuur Leeuwarden', 'Nec Nijmegen', 'Atromitos Athen', 'Fk Orenburg', 'Fk Sochi', 'Kv Kortrijk', 'Cd Santa Clara', 'Cs Maritimo', 'Fk Khimki', 'Hibernian Fc', 'Ac Ajaccio', 'Gd Estoril Praia', 'Fc Famalicao', 'Fortuna Sittard', 'Kvc Westerlo', 'Akhmat Grozny', 'Giresunspor', 'Mke Ankaragucu', 'Kv Mechelen', 'Kayserispor', 'Brondby If', 'Vitoria Guimaraes Sc', 'Sivasspor', 'Fc Twente Enschede', 'Fatih Karagumruk', 'Vitesse Arnheim', 'Sc Heerenveen', 'Cercle Brugge', 'Hatayspor', 'Dynamo Kiew', 'Alanyaspor', 'Kasimpasa', 'Krylya Sovetov Samara', 'Fc Midtjylland', 'Aj Auxerre', 'Aris Thessaloniki', 'Fk Rostov', 'Fc Nordsjaelland', 'Gil Vicente Fc', 'Fc Utrecht', 'Adana Demirspor', 'Antalyaspor', 'Aek Athen', 'Oud Heverlee Leuven', 'Fc Schalke 04', 'Konyaspor', 'Paok Thessaloniki', 'Clermont Foot 63', 'Sco Angers', 'Standard Luttich', 'Fc Groningen', 'Vfl Bochum', 'Rsc Charleroi', 'Shakhtar Donetsk', 'Istanbul Basaksehir Fk', 'Sampdoria Genua', 'Hellas Verona', 'Fc Cadiz', 'Us Cremonese', '1 Fc Koln', 'Fc Toulouse', 'Sk Dnipro 1', 'Panathinaikos Athen', 'Royale Union Saint Gilloise', 'Kaa Gent', 'Krc Genk', 'Rc Strassburg Alsace', 'Fk Krasnodar', 'Stade Brest 29', 'Spezia Calcio', 'Fc Elche', 'Us Lecce', 'Vfb Stuttgart', 'Fc Empoli', 'Fc Kopenhagen', 'Lokomotiv Moskau', 'Hertha Bsc', 'Olympiakos Piraus', 'Real Valladolid', 'Es Troyes Ac', 'Royal Antwerpen Fc', 'Sv Werder Bremen', 'Fc Augsburg', 'Az Alkmaar', 'Rcd Mallorca', 'Stade Reims', 'Zska Moskau', 'Rsc Anderlecht', 'Spartak Moskau', 'Fc Lorient', '1 Fsv Mainz 05', 'Rayo Vallecano', 'Ud Almeria', 'Espanyol Barcelona', 'Ac Monza', '1 Fc Union Berlin', 'Feyenoord Rotterdam', 'Dinamo Moskau', 'Glasgow Rangers', 'Celtic Glasgow', 'Fc Girona', 'Montpellier Hsc', 'Brighton Amp Hove Albion', 'Rc Lens', 'Ca Osasuna', 'Celta Vigo', 'Fc Nantes', 'Udinese Calcio', 'Fc Bologna', 'Sc Braga', 'Trabzonspor', 'Us Salernitana 1919', 'Besiktas Istanbul', 'Tsg 1899 Hoffenheim', 'Zenit St Petersburg', 'Psv Eindhoven', 'Fenerbahce Istanbul', 'Fc Getafe', 'Fc Turin', 'Vfl Wolfsburg', 'Borussia Monchengladbach', 'Sc Freiburg', 'Fc Brugge', 'Fc Valencia', 'Galatasaray Istanbul', 'Olympique Lyon', 'Afc Bournemouth', 'Us Sassuolo', 'Lazio Rom', 'Sporting Lissabon', 'Nottingham Forest', 'Fc Porto', 'Olympique Marseille', 'Fc Sevilla', 'Ac Florenz', 'Ogc Nizza', 'Fc Fulham', 'As Monaco', 'Fc Southampton', 'Athletic Bilbao', 'Ajax Amsterdam', 'Losc Lille', 'Real Betis Sevilla', 'Leeds United', 'Real Sociedad San Sebastian', 'Atalanta Bergamo', 'Wolverhampton Wanderers', 'Fc Everton', 'Eintracht Frankfurt', 'Benfica Lissabon', 'Ac Mailand', 'As Rom', 'Fc Brentford', 'Crystal Palace', 'Borussia Dortmund', 'Fc Stade Rennes', 'Fc Villarreal', 'Juventus Turin', 'Newcastle United', 'Aston Villa', 'West Ham United', 'Leicester City', 'Bayer 04 Leverkusen', 'Rasenballsport Leipzig', 'Atletico Madrid', 'Ssc Neapel', 'Inter Mailand', 'Manchester United', 'Fc Chelsea', 'Real Madrid', 'Fc Bayern Munchen', 'Fc Arsenal', 'Fc Barcelona', 'Tottenham Hotspur', 'Fc Liverpool', 'Fc Paris Saint Germain', 'Manchester City'],
    "current_club_domestic_competition_id": ['UKR1', 'DK1', 'GR1', 'SC1', 'BE1', 'RU1', 'NL1', 'TR1', 'PO1', 'FR1', 'L1', 'IT1', 'ES1', 'GB1'],
}
    
feat_ordinal = sorted(feat_ordinal_dict.keys())
feat_ordinal_values_sorted = [feat_ordinal_dict[i] for i in feat_ordinal]
print(feat_ordinal_values_sorted)    
    
ordinal_transformer = OrdinalEncoder(
    categories=feat_ordinal_values_sorted,
)

transformer = ColumnTransformer([
    ('num_tr', numerical_transformer, numerical_features),
    ('cat_tr', categorical_transformer, categorical_features),
    ('ord_tr', ordinal_transformer, ordinal_features)

])

[['Indonesia', 'Tajikistan', 'Faroe Islands', 'Moldova', 'Belarus', 'Montserrat', 'Uganda', 'Northern Ireland', 'Peru', 'Azerbaijan', 'Curacao', 'Costa Rica', 'Ireland', 'Iceland', 'Mozambique', 'Congo', 'Sierra Leone', 'Grenada', 'Finland', 'Panama', 'Guadeloupe', 'South Africa', 'Iraq', 'Chad', 'Tunisia', 'Angola', 'Venezuela', 'Greece', 'Australia', 'North Macedonia', 'Tanzania', 'Cape Verde', 'Slovakia', 'Scotland', 'Palestine', 'Luxembourg', 'Ukraine', 'Latvia', 'Jordan', 'Martinique', 'Comoros', 'Russia', 'Romania', 'Honduras', 'Turkey', 'Montenegro', 'Zimbabwe', 'Ecuador', 'Chile', 'Bosnia-Herzegovina', 'The Gambia', 'Denmark', 'Dominican Republic', 'Kosovo', 'Mali', 'Guinea', 'Mauritania', 'New Zealand', 'Guinea-Bissau', 'Uzbekistan', 'Armenia', 'Netherlands', 'Japan', 'Morocco', 'Gabon', 'Ghana', 'Slovenia', 'Togo', 'Philippines', 'Israel', 'Switzerland', 'Trinidad and Tobago', 'Kazakhstan', 'Austria', 'Suriname', 'Cameroon', 'Burkina Faso', 'Algeria', 'Paraguay', 'Belgium', '

In [33]:
def encode(frame, feature):
    '''
    function that custom encode a categorical feature ordered by the most impactful 
    sub-category on the SalePrice
    '''
    # create a temporary dataframe  
    ordering = pd.DataFrame()
    # create an index with the unique values of the selected feature
    ordering['val'] = frame[feature].unique()
    ordering.index = ordering.val
    # create a spmean column with the mean value of SalePrice 
    # for each category of the selected feature, sorted by value
    ordering['spmean'] = frame[[feature, target]]\
                         .groupby(feature).mean()[target]
    ordering = ordering.sort_values('spmean')
    # create a column ordering with the number from 1 to the number of
    # categories for the selected feature
    ordering['ordering'] = range(1, ordering.shape[0]+1)
    # extracting index and that column to a dictionary
    ordering = ordering['ordering'].to_dict()
    return ordering.keys()



In [34]:
df = pd.read_pickle('Attack_df.pikle')
encode(df,"country_of_citizenship")

dict_keys(['Indonesia', 'Tajikistan', 'Faroe Islands', 'Moldova', 'Belarus', 'Montserrat', 'Uganda', 'Northern Ireland', 'Peru', 'Azerbaijan', 'Curacao', 'Costa Rica', 'Ireland', 'Iceland', 'Mozambique', 'Congo', 'Sierra Leone', 'Grenada', 'Finland', 'Panama', 'Guadeloupe', 'South Africa', 'Iraq', 'Chad', 'Tunisia', 'Angola', 'Venezuela', 'Greece', 'Australia', 'North Macedonia', 'Tanzania', 'Cape Verde', 'Slovakia', 'Scotland', 'Palestine', 'Luxembourg', 'Ukraine', 'Latvia', 'Jordan', 'Martinique', 'Comoros', 'Russia', 'Romania', 'Honduras', 'Turkey', 'Montenegro', 'Zimbabwe', 'Ecuador', 'Chile', 'Bosnia-Herzegovina', 'The Gambia', 'Denmark', 'Dominican Republic', 'Kosovo', 'Mali', 'Guinea', 'Mauritania', 'New Zealand', 'Guinea-Bissau', 'Uzbekistan', 'Armenia', 'Netherlands', 'Japan', 'Morocco', 'Gabon', 'Ghana', 'Slovenia', 'Togo', 'Philippines', 'Israel', 'Switzerland', 'Trinidad and Tobago', 'Kazakhstan', 'Austria', 'Suriname', 'Cameroon', 'Burkina Faso', 'Algeria', 'Paraguay', 'Be

In [35]:
for feature in ordinal_features:
    feat_ordinal_dict = {}
    feat_ordinal_dict[feature] = list(encode(df,feature))
    
    feat_ordinal = sorted(feat_ordinal_dict.keys())
    feat_ordinal_values_sorted = [feat_ordinal_dict[i] for i in feat_ordinal]

    print(feat_ordinal_values_sorted)

[['Indonesia', 'Tajikistan', 'Faroe Islands', 'Moldova', 'Belarus', 'Montserrat', 'Uganda', 'Northern Ireland', 'Peru', 'Azerbaijan', 'Curacao', 'Costa Rica', 'Ireland', 'Iceland', 'Mozambique', 'Congo', 'Sierra Leone', 'Grenada', 'Finland', 'Panama', 'Guadeloupe', 'South Africa', 'Iraq', 'Chad', 'Tunisia', 'Angola', 'Venezuela', 'Greece', 'Australia', 'North Macedonia', 'Tanzania', 'Cape Verde', 'Slovakia', 'Scotland', 'Palestine', 'Luxembourg', 'Ukraine', 'Latvia', 'Jordan', 'Martinique', 'Comoros', 'Russia', 'Romania', 'Honduras', 'Turkey', 'Montenegro', 'Zimbabwe', 'Ecuador', 'Chile', 'Bosnia-Herzegovina', 'The Gambia', 'Denmark', 'Dominican Republic', 'Kosovo', 'Mali', 'Guinea', 'Mauritania', 'New Zealand', 'Guinea-Bissau', 'Uzbekistan', 'Armenia', 'Netherlands', 'Japan', 'Morocco', 'Gabon', 'Ghana', 'Slovenia', 'Togo', 'Philippines', 'Israel', 'Switzerland', 'Trinidad and Tobago', 'Kazakhstan', 'Austria', 'Suriname', 'Cameroon', 'Burkina Faso', 'Algeria', 'Paraguay', 'Belgium', '

In [36]:

position = "Attack"

df = pd.read_pickle(f'{position}_df.pikle')


numerical_transformer =  MinMaxScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

feat_ordinal_dict = {
    "country_of_citizenship": ['Indonesia', 'Tajikistan', 'Faroe Islands', 'Moldova', 'Belarus', 'Montserrat', 'Uganda', 'Northern Ireland', 'Peru', 'Azerbaijan', 'Curacao', 'Costa Rica', 'Ireland', 'Iceland', 'Mozambique', 'Congo', 'Sierra Leone', 'Grenada', 'Finland', 'Panama', 'Guadeloupe', 'South Africa', 'Iraq', 'Chad', 'Tunisia', 'Angola', 'Venezuela', 'Greece', 'Australia', 'North Macedonia', 'Tanzania', 'Cape Verde', 'Slovakia', 'Scotland', 'Palestine', 'Luxembourg', 'Ukraine', 'Latvia', 'Jordan', 'Martinique', 'Comoros', 'Russia', 'Romania', 'Honduras', 'Turkey', 'Montenegro', 'Zimbabwe', 'Ecuador', 'Chile', 'Bosnia-Herzegovina', 'The Gambia', 'Denmark', 'Dominican Republic', 'Kosovo', 'Mali', 'Guinea', 'Mauritania', 'New Zealand', 'Guinea-Bissau', 'Uzbekistan', 'Armenia', 'Netherlands', 'Japan', 'Morocco', 'Gabon', 'Ghana', 'Slovenia', 'Togo', 'Philippines', 'Israel', 'Switzerland', 'Trinidad and Tobago', 'Kazakhstan', 'Austria', 'Suriname', 'Cameroon', 'Burkina Faso', 'Algeria', 'Paraguay', 'Belgium', 'Iran', 'Benin', 'Senegal', 'DR Congo', 'Sweden', 'Serbia', 'Wales', 'Croatia', 'Jamaica', 'Spain', 'Albania', 'Nigeria', 'Zambia', 'Portugal', 'Uruguay', 'Italy', 'United States', "Cote d'Ivoire", 'Germany', 'Colombia', 'Poland', 'Argentina', 'France', 'Canada', 'Brazil', 'Norway', 'Mexico', 'Hungary', 'Georgia', 'Czech Republic', 'Korea, South', 'England', 'Egypt'],
    "current_club_name":['Fk Minaj', 'Pfk Lviv', 'Gaziantep Fk', 'Ingulets Petrove', 'Kryvbas Kryvyi Rig', 'Metal Kharkiv', 'Nk Veres Rivne', 'Metalist 1925 Kharkiv', 'Chornomorets Odessa', 'Motherwell Fc', 'Dundee United Fc', 'Pas Lamia 1964', 'Pas Giannina', 'Lyngby Bk', 'Kilmarnock Fc', 'Rukh Lviv', 'Fk Oleksandriya', 'Ac Horsens', 'Kolos Kovalivka', 'Asteras Tripolis', 'Aberdeen Fc', 'Ross County Fc', 'St Mirren Fc', 'Go Ahead Eagles Deventer', 'St Johnstone Fc', 'Livingston Fc', 'Viborg Ff', 'Fakel Voronezh', 'Fc Volendam', 'Volos Nps', 'Ionikos Nikeas', 'Ofi Kreta', 'Torpedo Moskau', 'Istanbulspor', 'Fc Emmen', 'Apo Levadiakos', 'Panetolikos Gfs', 'Odense Boldklub', 'Rfc Seraing', 'Zorya Lugansk', 'Portimonense Sc', 'Sbv Excelsior Rotterdam', 'Gd Chaves', 'Heart Of Midlothian Fc', 'Vorskla Poltava', 'Aarhus Gf', 'Rio Ave Fc', 'Sv Zulte Waregem', 'Vv St Truiden', 'Fk Nizhny Novgorod', 'Umraniyespor', 'Sparta Rotterdam', 'Aalborg Bk', 'Boavista Porto Fc', 'Silkeborg If', 'Randers Fc', 'Kv Oostende', 'Ural Ekaterinburg', 'Fc Pacos De Ferreira', 'Fc Vizela', 'Casa Pia Ac', 'Rkc Waalwijk', 'Kas Eupen', 'Fc Arouca', 'Sc Cambuur Leeuwarden', 'Nec Nijmegen', 'Atromitos Athen', 'Fk Orenburg', 'Fk Sochi', 'Kv Kortrijk', 'Cd Santa Clara', 'Cs Maritimo', 'Fk Khimki', 'Hibernian Fc', 'Ac Ajaccio', 'Gd Estoril Praia', 'Fc Famalicao', 'Fortuna Sittard', 'Kvc Westerlo', 'Akhmat Grozny', 'Giresunspor', 'Mke Ankaragucu', 'Kv Mechelen', 'Kayserispor', 'Brondby If', 'Vitoria Guimaraes Sc', 'Sivasspor', 'Fc Twente Enschede', 'Fatih Karagumruk', 'Vitesse Arnheim', 'Sc Heerenveen', 'Cercle Brugge', 'Hatayspor', 'Dynamo Kiew', 'Alanyaspor', 'Kasimpasa', 'Krylya Sovetov Samara', 'Fc Midtjylland', 'Aj Auxerre', 'Aris Thessaloniki', 'Fk Rostov', 'Fc Nordsjaelland', 'Gil Vicente Fc', 'Fc Utrecht', 'Adana Demirspor', 'Antalyaspor', 'Aek Athen', 'Oud Heverlee Leuven', 'Fc Schalke 04', 'Konyaspor', 'Paok Thessaloniki', 'Clermont Foot 63', 'Sco Angers', 'Standard Luttich', 'Fc Groningen', 'Vfl Bochum', 'Rsc Charleroi', 'Shakhtar Donetsk', 'Istanbul Basaksehir Fk', 'Sampdoria Genua', 'Hellas Verona', 'Fc Cadiz', 'Us Cremonese', '1 Fc Koln', 'Fc Toulouse', 'Sk Dnipro 1', 'Panathinaikos Athen', 'Royale Union Saint Gilloise', 'Kaa Gent', 'Krc Genk', 'Rc Strassburg Alsace', 'Fk Krasnodar', 'Stade Brest 29', 'Spezia Calcio', 'Fc Elche', 'Us Lecce', 'Vfb Stuttgart', 'Fc Empoli', 'Fc Kopenhagen', 'Lokomotiv Moskau', 'Hertha Bsc', 'Olympiakos Piraus', 'Real Valladolid', 'Es Troyes Ac', 'Royal Antwerpen Fc', 'Sv Werder Bremen', 'Fc Augsburg', 'Az Alkmaar', 'Rcd Mallorca', 'Stade Reims', 'Zska Moskau', 'Rsc Anderlecht', 'Spartak Moskau', 'Fc Lorient', '1 Fsv Mainz 05', 'Rayo Vallecano', 'Ud Almeria', 'Espanyol Barcelona', 'Ac Monza', '1 Fc Union Berlin', 'Feyenoord Rotterdam', 'Dinamo Moskau', 'Glasgow Rangers', 'Celtic Glasgow', 'Fc Girona', 'Montpellier Hsc', 'Brighton Amp Hove Albion', 'Rc Lens', 'Ca Osasuna', 'Celta Vigo', 'Fc Nantes', 'Udinese Calcio', 'Fc Bologna', 'Sc Braga', 'Trabzonspor', 'Us Salernitana 1919', 'Besiktas Istanbul', 'Tsg 1899 Hoffenheim', 'Zenit St Petersburg', 'Psv Eindhoven', 'Fenerbahce Istanbul', 'Fc Getafe', 'Fc Turin', 'Vfl Wolfsburg', 'Borussia Monchengladbach', 'Sc Freiburg', 'Fc Brugge', 'Fc Valencia', 'Galatasaray Istanbul', 'Olympique Lyon', 'Afc Bournemouth', 'Us Sassuolo', 'Lazio Rom', 'Sporting Lissabon', 'Nottingham Forest', 'Fc Porto', 'Olympique Marseille', 'Fc Sevilla', 'Ac Florenz', 'Ogc Nizza', 'Fc Fulham', 'As Monaco', 'Fc Southampton', 'Athletic Bilbao', 'Ajax Amsterdam', 'Losc Lille', 'Real Betis Sevilla', 'Leeds United', 'Real Sociedad San Sebastian', 'Atalanta Bergamo', 'Wolverhampton Wanderers', 'Fc Everton', 'Eintracht Frankfurt', 'Benfica Lissabon', 'Ac Mailand', 'As Rom', 'Fc Brentford', 'Crystal Palace', 'Borussia Dortmund', 'Fc Stade Rennes', 'Fc Villarreal', 'Juventus Turin', 'Newcastle United', 'Aston Villa', 'West Ham United', 'Leicester City', 'Bayer 04 Leverkusen', 'Rasenballsport Leipzig', 'Atletico Madrid', 'Ssc Neapel', 'Inter Mailand', 'Manchester United', 'Fc Chelsea', 'Real Madrid', 'Fc Bayern Munchen', 'Fc Arsenal', 'Fc Barcelona', 'Tottenham Hotspur', 'Fc Liverpool', 'Fc Paris Saint Germain', 'Manchester City'],
    "current_club_domestic_competition_id": ['UKR1', 'DK1', 'GR1', 'SC1', 'BE1', 'RU1', 'NL1', 'TR1', 'PO1', 'FR1', 'L1', 'IT1', 'ES1', 'GB1'],
}
    
feat_ordinal = sorted(feat_ordinal_dict.keys())
feat_ordinal_values_sorted = [feat_ordinal_dict[i] for i in feat_ordinal]  
    
ordinal_transformer = OrdinalEncoder(
    categories=feat_ordinal_values_sorted,
)

transformer = ColumnTransformer([
    ('num_tr', numerical_transformer, numerical_features),
    ('cat_tr', categorical_transformer, categorical_features),
    ('ord_tr', ordinal_transformer, ordinal_features)

])
transformer.fit(df)
transformed_df = transformer.transform(df)

X, y = transformed_df, df[target]
gbr = GradientBoostingRegressor(random_state=0, verbose = 0)
gbr.fit(X, y)

filename = f'{position}_model.pikle'
pk.dump(gbr, open(filename, 'wb'))

filename = f'{position}_transformer.pikle'
pk.dump(transformer, open(filename, 'wb'))

results = cross_validate (gbr, X, y)
mean_score = np.mean(results['test_score'])
print(mean_score)


0.7191854794875046


In [37]:
position = "Defender"

df = pd.read_pickle(f'{position}_df.pikle')

# for feature in ordinal_features:
#     feat_ordinal_dict = {}
#     feat_ordinal_dict[feature] = list(encode(df,feature))
#     print(feat_ordinal_dict)


numerical_transformer =  MinMaxScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')


feat_ordinal_dict = {
    'country_of_citizenship': ['Faroe Islands', 'Comoros', 'Azerbaijan', 'Malta', 'Uganda', 'Thailand', 'Bulgaria', 'China', 'Cape Verde', 'Belarus', 'Luxembourg', 'Tanzania', 'Curacao', 'Gabon', 'Finland', 'Venezuela', 'Madagascar', 'Romania', 'Congo', 'Lithuania', 'Kazakhstan', 'Sierra Leone', 'Uzbekistan', 'Bosnia-Herzegovina', 'Suriname', 'Haiti', 'Armenia', 'Greece', 'Russia', 'Ukraine', 'Costa Rica', 'Cyprus', 'New Zealand', 'Australia', 'Guinea', 'Togo', 'The Gambia', 'Peru', 'Tunisia', 'Georgia', 'Turkey', 'Iceland', 'Iran', 'Indonesia', 'French Guiana', 'Israel', 'Moldova', 'Belgium', 'Guinea-Bissau', 'Denmark', 'Slovenia', 'Mali', 'Martinique', 'Guadeloupe', 'Serbia', 'Sweden', 'Norway', 'Benin', 'Panama', 'Cameroon', 'Czech Republic', 'Northern Ireland', 'Ireland', 'North Macedonia', 'Angola', 'Scotland', 'Paraguay', 'Albania', 'Zimbabwe', 'Chile', 'Montenegro', 'Netherlands', "Cote d'Ivoire", 'Nigeria', 'Japan', 'Poland', 'DR Congo', 'Portugal', 'Senegal', 'Mexico', 'Colombia', 'Germany', 'Kosovo', 'Brazil', 'United States', 'Dominican Republic', 'Italy', 'Spain', 'Algeria', 'Wales', 'Mozambique', 'Ghana', 'Croatia', 'Burundi', 'Burkina Faso', 'Uruguay', 'Hungary', 'Switzerland', 'Ecuador', 'France', 'Argentina', 'Austria', 'England', 'Morocco', 'Jamaica', 'Slovakia', 'Korea, South', 'Canada'],
    'current_club_domestic_competition_id': ['UKR1', 'DK1', 'GR1', 'SC1', 'BE1', 'RU1', 'TR1', 'NL1', 'PO1', 'FR1', 'IT1', 'ES1', 'L1', 'GB1'],
    'current_club_name': ['Pfk Lviv', 'Fk Minaj', 'Ingulets Petrove', 'Metal Kharkiv', 'Nk Veres Rivne', 'Umraniyespor', 'Lyngby Bk', 'Chornomorets Odessa', 'Metalist 1925 Kharkiv', 'Pas Lamia 1964', 'Ionikos Nikeas', 'Kilmarnock Fc', 'Kryvbas Kryvyi Rig', 'Apo Levadiakos', 'Odense Boldklub', 'Motherwell Fc', 'Vorskla Poltava', 'Ross County Fc', 'Sparta Rotterdam', 'Rukh Lviv', 'Ac Horsens', 'Asteras Tripolis', 'Livingston Fc', 'Fc Emmen', 'Fc Pacos De Ferreira', 'Panetolikos Gfs', 'Fc Volendam', 'Dundee United Fc', 'St Johnstone Fc', 'Cs Maritimo', 'Sc Cambuur Leeuwarden', 'Gd Chaves', 'Volos Nps', 'Pas Giannina', 'Rkc Waalwijk', 'Kolos Kovalivka', 'Atromitos Athen', 'Silkeborg If', 'Fk Oleksandriya', 'Casa Pia Ac', 'Fakel Voronezh', 'Hatayspor', 'Viborg Ff', 'Aalborg Bk', 'Ofi Kreta', 'Aberdeen Fc', 'St Mirren Fc', 'Istanbulspor', 'Fk Khimki', 'Rfc Seraing', 'Go Ahead Eagles Deventer', 'Vv St Truiden', 'Sv Zulte Waregem', 'Giresunspor', 'Sbv Excelsior Rotterdam', 'Kas Eupen', 'Fk Orenburg', 'Torpedo Moskau', 'Gd Estoril Praia', 'Ural Ekaterinburg', 'Rio Ave Fc', 'Fc Arouca', 'Fc Nordsjaelland', 'Fc Vizela', 'Randers Fc', 'Kv Oostende', 'Nec Nijmegen', 'Hibernian Fc', 'Cd Santa Clara', 'Zorya Lugansk', 'Sk Dnipro 1', 'Fortuna Sittard', 'Fk Nizhny Novgorod', 'Heart Of Midlothian Fc', 'Fatih Karagumruk', 'Mke Ankaragucu', 'Aarhus Gf', 'Kv Kortrijk', 'Aris Thessaloniki', 'Gil Vicente Fc', 'Portimonense Sc', 'Akhmat Grozny', 'Kv Mechelen', 'Standard Luttich', 'Sivasspor', 'Oud Heverlee Leuven', 'Panathinaikos Athen', 'Fc Midtjylland', 'Boavista Porto Fc', 'Ac Ajaccio', 'Konyaspor', 'Fc Groningen', 'Kayserispor', 'Antalyaspor', 'Sc Heerenveen', 'Vitesse Arnheim', 'Rsc Charleroi', 'Alanyaspor', 'Cercle Brugge', 'Clermont Foot 63', 'Kasimpasa', 'Fk Sochi', 'Adana Demirspor', 'Royale Union Saint Gilloise', 'Fc Utrecht', 'Vitoria Guimaraes Sc', 'Kvc Westerlo', 'Istanbul Basaksehir Fk', 'Krylya Sovetov Samara', 'Fc Kopenhagen', 'Dynamo Kiew', 'Sco Angers', 'Brondby If', 'Aek Athen', 'Lokomotiv Moskau', 'Paok Thessaloniki', 'Fc Famalicao', 'Fc Cadiz', 'Zska Moskau', 'Real Valladolid', 'Fc Twente Enschede', 'Vfl Bochum', 'Aj Auxerre', 'Fc Schalke 04', 'Us Lecce', 'Fk Rostov', 'Es Troyes Ac', 'Royal Antwerpen Fc', 'Us Cremonese', 'Fc Toulouse', 'Fc Lorient', 'Dinamo Moskau', 'Fc Elche', 'Sampdoria Genua', 'Stade Reims', 'Fk Krasnodar', 'Sc Braga', 'Spartak Moskau', 'Rsc Anderlecht', 'Trabzonspor', 'Hertha Bsc', 'Olympiakos Piraus', 'Shakhtar Donetsk', 'Ac Monza', '1 Fc Koln', 'Montpellier Hsc', 'Stade Brest 29', 'Kaa Gent', 'Espanyol Barcelona', 'Rayo Vallecano', 'Rcd Mallorca', 'Fc Empoli', 'Spezia Calcio', 'Hellas Verona', 'Fc Nantes', 'Az Alkmaar', 'Fc Bologna', 'Besiktas Istanbul', 'Sv Werder Bremen', 'Us Salernitana 1919', 'Ud Almeria', '1 Fsv Mainz 05', 'Rc Strassburg Alsace', 'Fc Getafe', 'Celtic Glasgow', 'Us Sassuolo', 'Galatasaray Istanbul', 'Fc Brugge', 'Glasgow Rangers', 'Losc Lille', 'Krc Genk', 'Fc Girona', 'Zenit St Petersburg', 'Fc Augsburg', 'Rc Lens', 'Celta Vigo', 'Psv Eindhoven', 'Feyenoord Rotterdam', '1 Fc Union Berlin', 'Udinese Calcio', 'Fenerbahce Istanbul', 'Fc Turin', 'Real Betis Sevilla', 'Ca Osasuna', 'Fc Porto', 'Athletic Bilbao', 'Tsg 1899 Hoffenheim', 'Sporting Lissabon', 'Vfb Stuttgart', 'Eintracht Frankfurt', 'Olympique Lyon', 'Afc Bournemouth', 'Ogc Nizza', 'Fc Fulham', 'Lazio Rom', 'Fc Valencia', 'Fc Stade Rennes', 'Fc Brentford', 'Ac Florenz', 'Vfl Wolfsburg', 'Nottingham Forest', 'Borussia Monchengladbach', 'Fc Sevilla', 'Juventus Turin', 'Borussia Dortmund', 'Leeds United', 'Olympique Marseille', 'Newcastle United', 'Atalanta Bergamo', 'Real Sociedad San Sebastian', 'Benfica Lissabon', 'As Monaco', 'Ajax Amsterdam', 'Leicester City', 'Wolverhampton Wanderers', 'Fc Villarreal', 'As Rom', 'Fc Southampton', 'Brighton Amp Hove Albion', 'Crystal Palace', 'West Ham United', 'Fc Everton', 'Sc Freiburg', 'Ssc Neapel', 'Bayer 04 Leverkusen', 'Inter Mailand', 'Aston Villa', 'Atletico Madrid', 'Rasenballsport Leipzig', 'Ac Mailand', 'Manchester United', 'Fc Barcelona', 'Real Madrid', 'Tottenham Hotspur', 'Fc Liverpool', 'Manchester City', 'Fc Arsenal', 'Fc Paris Saint Germain', 'Fc Chelsea', 'Fc Bayern Munchen']
    }
    

feat_ordinal_values_sorted = [feat_ordinal_dict[i] for i in feat_ordinal]
    
ordinal_transformer = OrdinalEncoder(
    categories=feat_ordinal_values_sorted,
)

transformer = ColumnTransformer([
    ('num_tr', numerical_transformer, numerical_features),
    ('cat_tr', categorical_transformer, categorical_features),
    ('ord_tr', ordinal_transformer, ordinal_features)

])
transformer.fit(df)
transformed_df = transformer.transform(df)

X, y = transformed_df, df[target]
gbr = GradientBoostingRegressor(random_state=0, verbose = 0)
gbr.fit(X, y)

filename = f'{position}_model.pikle'
pk.dump(gbr, open(filename, 'wb'))

filename = f'{position}_transformer.pikle'
pk.dump(transformer, open(filename, 'wb'))

results = cross_validate (gbr, X, y)
mean_score = np.mean(results['test_score'])
print(mean_score)

0.6683888577536162


In [38]:
position = "Goalkeeper"

df = pd.read_pickle(f'{position}_df.pikle')

# for feature in ordinal_features:
#     feat_ordinal_dict = {}
#     feat_ordinal_dict[feature] = list(encode(df,feature))
#     print(feat_ordinal_dict)


numerical_transformer =  MinMaxScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')


feat_ordinal_dict = {
    'country_of_citizenship': ['Angola', 'Guinea-Bissau', 'Nigeria', 'Lithuania', 'Algeria', 'Haiti', 'Kosovo', 'Comoros', 'Tunisia', 'Kazakhstan', 'Scotland', 'Colombia', 'Ghana', 'Japan', 'Israel', 'Northern Ireland', 'French Guiana', 'Sweden', 'Bosnia-Herzegovina', 'Iceland', 'Norway', 'Denmark', 'Ukraine', 'Netherlands', 'Greece', 'Turkey', 'Austria', 'Chile', 'Russia', 'Finland', 'Uruguay', 'Mexico', 'Croatia', 'Romania', 'Luxembourg', 'Burkina Faso', 'Czech Republic', 'Slovakia', 'Albania', 'Germany', 'Belgium', 'Australia', 'Serbia', 'Poland', 'Italy', 'France', 'Hungary', 'Venezuela', 'North Macedonia', 'England', 'Portugal', 'Wales', 'Brazil', 'Spain', 'Costa Rica', 'Ireland', 'United States', 'Slovenia', 'Cameroon', 'Morocco', 'Switzerland', 'Argentina', 'Senegal', 'Georgia'],
    'current_club_domestic_competition_id': ['SC1', 'GR1', 'DK1', 'UKR1', 'TR1', 'BE1', 'RU1', 'NL1', 'PO1', 'L1', 'FR1', 'IT1', 'ES1', 'GB1'],
    'current_club_name': ['Gaziantep Fk', 'Vv St Truiden', 'Fk Minaj', 'Rukh Lviv', 'Casa Pia Ac', 'Boavista Porto Fc', 'Sv Zulte Waregem', 'Kryvbas Kryvyi Rig', 'Metalist 1925 Kharkiv', 'Metal Kharkiv', 'Pas Lamia 1964', 'Gd Chaves', 'Pfk Lviv', 'Chornomorets Odessa', 'Fatih Karagumruk', 'Nk Veres Rivne', 'Kv Kortrijk', 'St Johnstone Fc', 'Istanbulspor', 'Lyngby Bk', 'Fc Pacos De Ferreira', 'Hatayspor', 'St Mirren Fc', 'Adana Demirspor', 'Ofi Kreta', 'Vorskla Poltava', 'Pas Giannina', 'Ingulets Petrove', 'Sivasspor', 'Dundee United Fc', 'Giresunspor', 'Volos Nps', 'Kayserispor', 'Apo Levadiakos', 'Ross County Fc', 'Hibernian Fc', 'Ionikos Nikeas', 'Kas Eupen', 'Portimonense Sc', 'Rkc Waalwijk', 'Kilmarnock Fc', 'Silkeborg If', 'Odense Boldklub', 'Glasgow Rangers', 'Aarhus Gf', 'Livingston Fc', 'Cd Santa Clara', 'Go Ahead Eagles Deventer', 'Fc Volendam', 'Fc Emmen', 'Aris Thessaloniki', 'Asteras Tripolis', 'Fk Oleksandriya', 'Fc Arouca', 'Alanyaspor', 'Viborg Ff', 'Cs Maritimo', 'Torpedo Moskau', 'Heart Of Midlothian Fc', 'Sbv Excelsior Rotterdam', 'Kolos Kovalivka', 'Ac Ajaccio', 'Panetolikos Gfs', 'Atromitos Athen', 'Sparta Rotterdam', 'Fc Nordsjaelland', 'Istanbul Basaksehir Fk', 'Fc Vizela', 'Sk Dnipro 1', 'Sc Cambuur Leeuwarden', 'Zorya Lugansk', 'Motherwell Fc', 'Randers Fc', 'Fc Groningen', 'Fc Empoli', 'Fk Khimki', 'Rio Ave Fc', 'Akhmat Grozny', 'Mke Ankaragucu', 'Fc Midtjylland', 'Fk Orenburg', 'Oud Heverlee Leuven', 'Fk Nizhny Novgorod', 'Stade Reims', 'Sc Freiburg', 'Fakel Voronezh', 'Kv Oostende', 'Ac Horsens', 'Fc Schalke 04', 'Fk Rostov', 'Ural Ekaterinburg', 'Fk Sochi', 'Rfc Seraing', 'Kasimpasa', 'Kvc Westerlo', 'Dinamo Moskau', 'Aalborg Bk', 'Rsc Charleroi', 'Konyaspor', 'Kv Mechelen', 'Fc Augsburg', 'Royale Union Saint Gilloise', 'Paok Thessaloniki', 'Zenit St Petersburg', 'Gd Estoril Praia', 'Us Sassuolo', 'Vfl Bochum', 'Fortuna Sittard', 'Fc Twente Enschede', 'Panathinaikos Athen', 'Brondby If', 'Clermont Foot 63', 'Olympiakos Piraus', 'Aberdeen Fc', 'Fc Utrecht', 'Vitesse Arnheim', 'Umraniyespor', 'Ud Almeria', 'Fc Toulouse', 'Sporting Lissabon', 'Krylya Sovetov Samara', 'Antalyaspor', 'Celtic Glasgow', 'Cercle Brugge', 'Galatasaray Istanbul', 'Us Salernitana 1919', 'Aek Athen', 'Ac Florenz', 'Fc Girona', 'Fc Bologna', 'Es Troyes Ac', 'Fc Villarreal', 'Fc Turin', 'Fc Lorient', 'Nec Nijmegen', 'Gil Vicente Fc', 'Zska Moskau', 'Lokomotiv Moskau', 'Spezia Calcio', 'Us Lecce', 'Vfb Stuttgart', 'Kaa Gent', 'Tsg 1899 Hoffenheim', 'Standard Luttich', 'Aj Auxerre', 'Hellas Verona', 'Bayer 04 Leverkusen', 'Fc Stade Rennes', 'Az Alkmaar', 'Celta Vigo', 'Ogc Nizza', 'Stade Brest 29', 'Rasenballsport Leipzig', 'Sv Werder Bremen', 'Spartak Moskau', 'Montpellier Hsc', '1 Fc Koln', 'Afc Bournemouth', 'Rsc Anderlecht', 'Rayo Vallecano', 'Hertha Bsc', 'Real Valladolid', 'Sc Heerenveen', 'Fc Kopenhagen', 'Sco Angers', 'Fc Brugge', '1 Fsv Mainz 05', 'Sampdoria Genua', 'Espanyol Barcelona', 'Fc Famalicao', 'Vfl Wolfsburg', 'Royal Antwerpen Fc', '1 Fc Union Berlin', 'Vitoria Guimaraes Sc', 'Losc Lille', 'As Rom', 'Tottenham Hotspur', 'Fc Elche', 'Eintracht Frankfurt', 'Besiktas Istanbul', 'Ca Osasuna', 'Borussia Monchengladbach', 'Udinese Calcio', 'Rc Strassburg Alsace', 'Sc Braga', 'Ac Monza', 'Rc Lens', 'Krc Genk', 'Olympique Lyon', 'Ajax Amsterdam', 'Shakhtar Donetsk', 'Leicester City', 'Fc Getafe', 'Trabzonspor', 'Feyenoord Rotterdam', 'Crystal Palace', 'As Monaco', 'Psv Eindhoven', 'Rcd Mallorca', 'Us Cremonese', 'Dynamo Kiew', 'West Ham United', 'Fc Bayern Munchen', 'Ssc Neapel', 'Fenerbahce Istanbul', 'Lazio Rom', 'Inter Mailand', 'Olympique Marseille', 'Fc Cadiz', 'Fc Nantes', 'Manchester United', 'Fc Southampton', 'Juventus Turin', 'Real Betis Sevilla', 'Atalanta Bergamo', 'Newcastle United', 'Wolverhampton Wanderers', 'Benfica Lissabon', 'Fc Fulham', 'Fc Valencia', 'Borussia Dortmund', 'Fk Krasnodar', 'Leeds United', 'Ac Mailand', 'Nottingham Forest', 'Aston Villa', 'Fc Everton', 'Fc Sevilla', 'Athletic Bilbao', 'Fc Barcelona', 'Fc Chelsea', 'Real Sociedad San Sebastian', 'Fc Brentford', 'Fc Paris Saint Germain', 'Fc Porto', 'Brighton Amp Hove Albion', 'Manchester City', 'Fc Arsenal', 'Fc Liverpool', 'Atletico Madrid', 'Real Madrid']
    }
    

feat_ordinal_values_sorted = [feat_ordinal_dict[i] for i in feat_ordinal] 
    
ordinal_transformer = OrdinalEncoder(
    categories=feat_ordinal_values_sorted,
)

transformer = ColumnTransformer([
    ('num_tr', numerical_transformer, numerical_features),
    ('cat_tr', categorical_transformer, categorical_features),
    ('ord_tr', ordinal_transformer, ordinal_features)

])
transformer.fit(df)
transformed_df = transformer.transform(df)

X, y = transformed_df, df[target]
gbr = GradientBoostingRegressor(random_state=0, verbose = 0)
gbr.fit(X, y)

filename = f'{position}_model.pikle'
pk.dump(gbr, open(filename, 'wb'))

filename = f'{position}_transformer.pikle'
pk.dump(transformer, open(filename, 'wb'))

results = cross_validate (gbr, X, y)
mean_score = np.mean(results['test_score'])
print(mean_score)

0.7355488826301712


In [39]:
position = "Midfield"

df = pd.read_pickle(f'{position}_df.pikle')

# for feature in ordinal_features:
#     feat_ordinal_dict = {}
#     feat_ordinal_dict[feature] = list(encode(df,feature))
#     print(feat_ordinal_dict)


numerical_transformer =  MinMaxScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')


feat_ordinal_dict = {
    'country_of_citizenship': ['Iran', 'Togo', 'Trinidad and Tobago', 'Costa Rica', 'Burundi', 'Estonia', 'Honduras', 'Moldova', 'Curacao', 'Montenegro', 'Liberia', 'Bolivia', 'Uganda', 'Zambia', 'Belarus', 'Lithuania', 'Northern Ireland', 'Burkina Faso', 'Ireland', 'Ukraine', 'Iraq', 'New Zealand', 'Cape Verde', 'Cyprus', 'Georgia', 'Kosovo', 'Guinea-Bissau', 'Angola', 'Kazakhstan', 'Greece', 'The Gambia', 'Iceland', 'Congo', 'Equatorial Guinea', 'Australia', 'Martinique', 'Bulgaria', 'Wales', 'Russia', 'Israel', 'Uzbekistan', 'DR Congo', 'Madagascar', 'Chile', 'Canada', 'Bosnia-Herzegovina', 'Sweden', 'Slovenia', 'Albania', 'Turkey', 'Grenada', 'Paraguay', 'Morocco', 'Japan', 'Scotland', 'Denmark', 'Belgium', 'Senegal', 'Korea, South', 'Malta', 'Finland', 'Norway', 'Luxembourg', 'Gabon', 'Venezuela', 'Zimbabwe', 'Ghana', 'Netherlands', 'Guinea', 'Cameroon', 'Egypt', 'Portugal', 'Colombia', 'Nigeria', 'France', 'United States', 'Croatia', "Cote d'Ivoire", 'Mali', 'Slovakia', 'Algeria', 'Romania', 'North Macedonia', 'Germany', 'Tunisia', 'Poland', 'Switzerland', 'Peru', 'Austria', 'Italy', 'Czech Republic', 'Brazil', 'Serbia', 'Spain', 'Mexico', 'Argentina', 'England', 'Libya', 'Ecuador', 'Central African Republic', 'Uruguay'],
    'current_club_domestic_competition_id': ['UKR1', 'DK1', 'GR1', 'SC1', 'BE1', 'TR1', 'RU1', 'PO1', 'NL1', 'FR1', 'L1', 'IT1', 'ES1', 'GB1'],
    'current_club_name': ['Ionikos Nikeas', 'Kryvbas Kryvyi Rig', 'Fk Minaj', 'Asteras Tripolis', 'Pfk Lviv', 'Lyngby Bk', 'Metal Kharkiv', 'Chornomorets Odessa', 'Ross County Fc', 'Rio Ave Fc', 'Ingulets Petrove', 'Panetolikos Gfs', 'Fc Arouca', 'St Johnstone Fc', 'Fakel Voronezh', 'Kolos Kovalivka', 'Kilmarnock Fc', 'Rukh Lviv', 'Metalist 1925 Kharkiv', 'Ac Horsens', 'Pas Lamia 1964', 'Apo Levadiakos', 'Fk Oleksandriya', 'Nk Veres Rivne', 'Pas Giannina', 'Zorya Lugansk', 'Rfc Seraing', 'Rkc Waalwijk', 'Motherwell Fc', 'Fc Vizela', 'Portimonense Sc', 'Livingston Fc', 'Viborg Ff', 'Cs Maritimo', 'Silkeborg If', 'Volos Nps', 'Nec Nijmegen', 'St Mirren Fc', 'Gd Chaves', 'Umraniyespor', 'Vorskla Poltava', 'Torpedo Moskau', 'Odense Boldklub', 'Kas Eupen', 'Ofi Kreta', 'Sbv Excelsior Rotterdam', 'Atromitos Athen', 'Istanbulspor', 'Dundee United Fc', 'Hibernian Fc', 'Go Ahead Eagles Deventer', 'Hatayspor', 'Fk Khimki', 'Sv Zulte Waregem', 'Vv St Truiden', 'Randers Fc', 'Fc Emmen', 'Fc Volendam', 'Aalborg Bk', 'Sparta Rotterdam', 'Heart Of Midlothian Fc', 'Aberdeen Fc', 'Kv Kortrijk', 'Sc Cambuur Leeuwarden', 'Gil Vicente Fc', 'Aarhus Gf', 'Gd Estoril Praia', 'Kayserispor', 'Giresunspor', 'Gaziantep Fk', 'Casa Pia Ac', 'Boavista Porto Fc', 'Sc Heerenveen', 'Ural Ekaterinburg', 'Aris Thessaloniki', 'Sivasspor', 'Fc Pacos De Ferreira', 'Kvc Westerlo', 'Fk Nizhny Novgorod', 'Fk Orenburg', 'Fc Nordsjaelland', 'Cd Santa Clara', 'Antalyaspor', 'Kasimpasa', 'Akhmat Grozny', 'Fc Groningen', 'Kv Oostende', 'Sk Dnipro 1', 'Cercle Brugge', 'Kv Mechelen', 'Ac Ajaccio', 'Clermont Foot 63', 'Panathinaikos Athen', 'Fatih Karagumruk', 'Vfl Bochum', 'Krylya Sovetov Samara', 'Fortuna Sittard', 'Fc Cadiz', 'Adana Demirspor', 'Brondby If', 'Vitoria Guimaraes Sc', 'Fk Krasnodar', 'Mke Ankaragucu', 'Fc Kopenhagen', 'Oud Heverlee Leuven', 'Fc Utrecht', 'Aj Auxerre', 'Sco Angers', 'Fk Sochi', 'Alanyaspor', 'Istanbul Basaksehir Fk', 'Konyaspor', 'Fc Midtjylland', 'Fc Famalicao', 'Shakhtar Donetsk', 'Rsc Charleroi', 'Kaa Gent', 'Royale Union Saint Gilloise', 'Aek Athen', 'Rcd Mallorca', 'Rsc Anderlecht', 'Sv Werder Bremen', 'Paok Thessaloniki', 'Spezia Calcio', 'Us Cremonese', 'Dynamo Kiew', 'Standard Luttich', 'Fc Schalke 04', 'Fc Elche', 'Royal Antwerpen Fc', 'Fc Empoli', 'Us Salernitana 1919', 'Ud Almeria', 'Stade Brest 29', 'Vitesse Arnheim', 'Sampdoria Genua', 'Fc Twente Enschede', 'Fc Augsburg', 'Fk Rostov', 'Fc Toulouse', 'Real Valladolid', 'Us Lecce', 'Besiktas Istanbul', 'Az Alkmaar', 'Hellas Verona', 'Fc Nantes', 'Krc Genk', 'Olympiakos Piraus', 'Es Troyes Ac', 'Glasgow Rangers', 'Rayo Vallecano', 'Vfb Stuttgart', 'Spartak Moskau', 'Zska Moskau', 'Montpellier Hsc', 'Lokomotiv Moskau', 'Fc Turin', 'Ac Monza', 'Celtic Glasgow', 'Sc Freiburg', '1 Fc Koln', 'Trabzonspor', 'Rc Strassburg Alsace', 'Fc Bologna', 'Fc Lorient', 'Stade Reims', 'Dinamo Moskau', 'Tsg 1899 Hoffenheim', 'Udinese Calcio', '1 Fsv Mainz 05', 'Ca Osasuna', 'Fc Girona', 'Galatasaray Istanbul', 'Athletic Bilbao', 'Sc Braga', 'Fenerbahce Istanbul', 'Hertha Bsc', 'Fc Getafe', 'Bayer 04 Leverkusen', 'Losc Lille', '1 Fc Union Berlin', 'Espanyol Barcelona', 'Sporting Lissabon', 'Eintracht Frankfurt', 'Rc Lens', 'Fc Sevilla', 'Nottingham Forest', 'Fc Brugge', 'Ogc Nizza', 'As Rom', 'Fc Villarreal', 'Celta Vigo', 'Vfl Wolfsburg', 'Crystal Palace', 'Ac Florenz', 'Us Sassuolo', 'Fc Porto', 'Fc Brentford', 'Fc Stade Rennes', 'Fc Fulham', 'Feyenoord Rotterdam', 'Afc Bournemouth', 'Ajax Amsterdam', 'Real Betis Sevilla', 'Borussia Monchengladbach', 'As Monaco', 'Leeds United', 'Olympique Lyon', 'Benfica Lissabon', 'Lazio Rom', 'Olympique Marseille', 'Fc Southampton', 'Atalanta Bergamo', 'Fc Everton', 'Wolverhampton Wanderers', 'Fc Valencia', 'Zenit St Petersburg', 'Fc Liverpool', 'Rasenballsport Leipzig', 'Brighton Amp Hove Albion', 'Psv Eindhoven', 'Atletico Madrid', 'Juventus Turin', 'Real Sociedad San Sebastian', 'Manchester United', 'Ac Mailand', 'Leicester City', 'Newcastle United', 'Ssc Neapel', 'Tottenham Hotspur', 'Borussia Dortmund', 'Fc Arsenal', 'Inter Mailand', 'Aston Villa', 'Fc Chelsea', 'Fc Paris Saint Germain', 'Manchester City', 'West Ham United', 'Real Madrid', 'Fc Barcelona', 'Fc Bayern Munchen']
    }
    

feat_ordinal_values_sorted = [feat_ordinal_dict[i] for i in feat_ordinal]   
    
ordinal_transformer = OrdinalEncoder(
    categories=feat_ordinal_values_sorted,
)

transformer = ColumnTransformer([
    ('num_tr', numerical_transformer, numerical_features),
    ('cat_tr', categorical_transformer, categorical_features),
    ('ord_tr', ordinal_transformer, ordinal_features)

])
transformer.fit(df)
transformed_df = transformer.transform(df)

X, y = transformed_df, df[target]
gbr = GradientBoostingRegressor(random_state=0, verbose = 0)
gbr.fit(X, y)

filename = f'{position}_model.pikle'
pk.dump(gbr, open(filename, 'wb'))

filename = f'{position}_transformer.pikle'
pk.dump(transformer, open(filename, 'wb'))

results = cross_validate (gbr, X, y)
mean_score = np.mean(results['test_score'])
print(mean_score)

0.6740290352264061


In [40]:
# position = "Attack"
# df = pd.read_pickle(f'{position}_df.pikle')
# transformer = ColumnTransformer([
#     ('num_tr', numerical_transformer, numerical_features),
#     ('cat_tr', categorical_transformer, categorical_features),
#     ('ord_tr', ordinal_transformer, ordinal_features)])

# transformer.fit(df)
# transformed_df = transformer.transform(df)


# X, y = transformed_df, df[target]
# gbr = GradientBoostingRegressor(random_state=0, verbose = 0)
# gbr.fit(X, y)

# filename = f'{position}_model.pikle'
# pk.dump(gbr, open(filename, 'wb'))

# filename = f'{position}_transformer.pikle'
# pk.dump(transformer, open(filename, 'wb'))

# results = cross_validate (gbr, X, y)
# mean_score = np.mean(results['test_score'])
# print(mean_score)


In [41]:
transformed_df.shape
#df.columns

(1329, 25)

In [42]:
len(numerical_features)+len(categorical_features)+len(ordinal_features)

19

In [43]:
df.sub_position.unique()

array(['Central Midfield', 'Defensive Midfield', 'Left Midfield',
       'Right Midfield'], dtype=object)

In [44]:
transformed_df.shape

(1329, 25)

In [45]:
results = cross_validate (gbr, X, y)

In [46]:
results

{'fit_time': array([0.16589308, 0.16057706, 0.16398883, 0.15489531, 0.15159273]),
 'score_time': array([0.00051594, 0.00082731, 0.00059819, 0.00057769, 0.00050211]),
 'test_score': array([0.81677114, 0.79837769, 0.69533827, 0.63699159, 0.42266649])}

In [47]:
mean_score = np.mean(results['test_score'])
mean_score

0.6740290352264061

In [48]:
# save the model to disk


In [49]:
# save the pipeline to disk


We need 20 inputs --> preproc takes care

In [50]:
player = pd.DataFrame()

In [51]:
"""numerical_features = [ 'games_2022', 'minutes_played_2022', 'goals_2022',
       'assists_2022', 'goals_against_2022', 'goals_for_2022',
       'clean_sheet_2022', 'height_in_cm', 'age', 
       'club_value', 'squad_size', 'term_days_remaining',
       'value_goals_for_2022', 'yellow_cards_2022', 'red_cards_2022']
categorical_features = ['sub_position', 'foot']
ordinal_features = ['country_of_citizenship','current_club_domestic_competition_id', 'current_club_name']
target = 'market_value_in_eur'"""

"numerical_features = [ 'games_2022', 'minutes_played_2022', 'goals_2022',\n       'assists_2022', 'goals_against_2022', 'goals_for_2022',\n       'clean_sheet_2022', 'height_in_cm', 'age', \n       'club_value', 'squad_size', 'term_days_remaining',\n       'value_goals_for_2022', 'yellow_cards_2022', 'red_cards_2022']\ncategorical_features = ['sub_position', 'foot']\nordinal_features = ['country_of_citizenship','current_club_domestic_competition_id', 'current_club_name']\ntarget = 'market_value_in_eur'"

In [52]:
player['games_2022'] =[20]
player['minutes_played_2022'] =[1800] 
player['goals_2022'] =[200]
player['assists_2022'] =[15] 
player['goals_against_2022'] =[20] 
player['goals_for_2022'] =[10]
player['clean_sheet_2022'] =[7] 
player['height_in_cm'] =[186] 
player['age'] =[31] 
player['club_value'] =[50000000] 
player['squad_size'] =[18] 
player['term_days_remaining'] =[200]
player['value_goals_for_2022'] =[50000000] 
player['yellow_cards_2022'] =[2] 
player['red_cards_2022'] =[0]
player['sub_position'] =['Left Winger'] 
player['foot'] =['Right']
player['country_of_citizenship'] =['Italy']
player['current_club_domestic_competition_id'] =['IT1'] 
player['current_club_name'] = ['Juventus Turin']

In [53]:
player

Unnamed: 0,games_2022,minutes_played_2022,goals_2022,assists_2022,goals_against_2022,goals_for_2022,clean_sheet_2022,height_in_cm,age,club_value,squad_size,term_days_remaining,value_goals_for_2022,yellow_cards_2022,red_cards_2022,sub_position,foot,country_of_citizenship,current_club_domestic_competition_id,current_club_name
0,20,1800,200,15,20,10,7,186,31,50000000,18,200,50000000,2,0,Left Winger,Right,Italy,IT1,Juventus Turin


In [54]:
player_transformed = transformer.transform(player)

In [55]:
my_value = gbr.predict(player_transformed)

In [56]:
my_value

array([45652563.46967193])