# Imports

In [28]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import joblib

In [29]:
current_season = "24-25"

# Load the datasets

In [30]:
# load dataset players
print("PLAYERS")
urlPlayers = current_season + '/output_rating.csv'
dataframePlayers = pd.read_csv(urlPlayers, header=0, index_col=None)
# summarize shape
print("Shape: " + str(dataframePlayers.shape))
# summarize first few lines
print("Summary Players")
print(dataframePlayers)
# load dataset squads
print("TEAMS Nicosia")
urlExcel = current_season + '/Rose_fantalega-nicosia.xlsx'
dataframeTeams = pd.read_excel(urlExcel, index_col=None)
# summarize shape
print("Shape:" + str(dataframeTeams.shape))
# summarize first few lines
print("Summary Players")
print(dataframeTeams)

PLAYERS
Shape: (539, 24)
Summary Players
       Id Role         Name     Squad  Price  MyRating         Mate  \
0    4431    P  Carnesecchi  Atalanta     10       1.4        Musso   
1    2792    P        Musso  Atalanta      5       2.4  Carnesecchi   
2    2297    P     Rossi F.  Atalanta      1       1.0          NaN   
3     554    D   Zappacosta  Atalanta     15       4.4         Holm   
4    5067    D       Bakker  Atalanta     11       4.0      Ruggeri   
..    ...  ...          ...       ...    ...       ...          ...   
534  5529    A        Henry    Verona     11       1.9          NaN   
535  5471    A       Djuric    Verona     10       1.8    Bonazzoli   
536   505    A    Bonazzoli    Verona      8       2.3       Djuric   
537  5395    A        Braaf    Verona      1       1.0          NaN   
538  5439    A       Kallon    Verona      1       1.0          NaN   

     Regularness  FVM  Pg22_23  ...  Mf21_22  Pg20_21  Mv20_21  Mf20_21  \
0              3   10       27 

## Get Data

In [31]:
roles = ["P", "D", "C", "A"]
roles_m = ['Por', 'Dc', 'B', 'Dd', 'Ds', 'E', 'M', 'C', 'W', 'T', 'Pc', 'A']

players_list = dataframePlayers["Name"].tolist()
print(players_list)

['Carnesecchi', 'Musso', 'Rossi F.', 'Zappacosta', 'Bakker', 'Scalvini', 'Holm', 'Kolasinac', 'Toloi', 'Djimsiti', 'Palomino', 'Hateboer', 'Zortea', 'Ruggeri', 'Bonfanti', 'Koopmeiners', 'Pasalic', 'Ederson D.s.', 'De Roon', 'Miranchuk', 'De Ketelaere', 'Adopo', 'Scamacca', 'Lookman', "Toure' E.", 'Muriel', 'Skorupski', 'Ravaglia F.', 'Bagnolini', 'Posch', "Lucumi'", 'Kristiansen', 'Beukema', 'Lykogiannis', 'Calafiori', 'Soumaoro', 'Bonifazi', 'Corazza', 'De Silvestri', 'Amey', 'Orsolini', 'Ferguson', 'Ndoye', 'Freuler', 'Moro N.', 'Aebischer', 'Fabbian', 'Saelemaekers', 'Urbanski', 'El Azzouzi', 'Zirkzee', 'Karlsson', 'Van Hooijdonk', 'Radunovic', 'Scuffet', 'Aresti', 'Zappa', 'Dossena', 'Azzi', 'Wieteska', 'Augello', 'Obert', 'Hatzidiakos', 'Goldaniga', 'Di Pardo', 'Capradossi', 'Jankto', 'Nandez', 'Mancosu', 'Oristanio', 'Makoumbou', 'Deiola', 'Sulemana I.', 'Rog', 'Viola', 'Prati', 'Lapadula', 'Petagna', 'Shomurodov', 'Luvumbo', 'Pavoletti', 'Caprile', 'Berisha', 'Perisan', 'Stublj

## Mean and Std

In [32]:
# Group by 'Name' and calculate mean, std, and count for each player
player_prices = dataframeTeams.groupby('Name')['Price'].agg(['mean', 'std', 'count']).reset_index()

# Replace NaN values in 'std' with 1
player_prices['std'].fillna(1, inplace=True)

# Iterate through the players_list to handle cases where a player has no occurrences
for player in players_list:
    if player not in player_prices['Name'].values:
        # If a player has no occurrences, set mean, std, and count to 1
        player_prices = pd.concat([player_prices, pd.DataFrame({'Name': [player], 'mean': [1], 'std': [1], 'count': [0]})], ignore_index=True)

# Now, player_stats contains the mean, std, and count for each player, with rows where std is NaN removed
print(player_prices)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  player_prices['std'].fillna(1, inplace=True)


            Name       mean       std  count
0        Abraham   1.000000  1.000000      1
1         Acerbi   6.692308  3.250247     13
2    Alex Sandro   4.000000  4.242641      2
3       Almqvist   4.785714  6.459085     14
4          Aouar  13.714286  8.870249     14
..           ...        ...       ...    ...
534       Suslov   1.000000  1.000000      0
535      Hrustic   1.000000  1.000000      0
536     Joselito   1.000000  1.000000      0
537        Braaf   1.000000  1.000000      0
538       Kallon   1.000000  1.000000      0

[539 rows x 4 columns]


## Substitute the player name with his FVM at the start of the season

**Filter and Visualize**

In [33]:
# Filter the dataframe to the ones having more than 8 count
player_prices = player_prices[player_prices['count'] >= 8]
print(player_prices)

player_prices = pd.merge(player_prices, dataframePlayers[['Name', 'FVM', 'Role']], on='Name', how='left')
print(player_prices)

# Split the merged dataframe into separate dataframes based on 'Role'
df_P = player_prices[player_prices['Role'] == 'P']  # DataFrame for role 'P'
df_D = player_prices[player_prices['Role'] == 'D']  # DataFrame for role 'D'
df_C = player_prices[player_prices['Role'] == 'C']  # DataFrame for role 'C'
df_A = player_prices[player_prices['Role'] == 'A']  # DataFrame for role 'A'

print(df_D)
# Filter the dataframe to the ones having more than 8 count
df_D = df_D[df_D['FVM'] >= 10]
print(df_D)

               Name       mean        std  count
1            Acerbi   6.692308   3.250247     13
3          Almqvist   4.785714   6.459085     14
4             Aouar  13.714286   8.870249     14
5        Arnautovic  32.785714  15.373196     14
6       Arthur Melo   2.250000   2.549510      8
..              ...        ...        ...    ...
299  Zambo Anguissa   9.571429   7.470087     14
300       Zapata D.  83.357143  23.873407     14
302      Zappacosta  37.142857   6.893220     14
303       Zielinski  60.214286  11.018715     14
304         Zirkzee  36.642857  14.642442     14

[188 rows x 4 columns]
               Name       mean        std  count  FVM Role
0            Acerbi   6.692308   3.250247     13   32    D
1          Almqvist   4.785714   6.459085     14   28    A
2             Aouar  13.714286   8.870249     14   47    C
3        Arnautovic  32.785714  15.373196     14  122    A
4       Arthur Melo   2.250000   2.549510      8   18    C
..              ...        ...    

**Regressors**

In [34]:
# Function to train regression models for mean and std
def train_regression_models(df, role):
    # Extract X and Y values
    X = df[['FVM']].values  # Input (FVM)
    y_mean = df['mean'].values  # Target for mean
    y_std = df['std'].values  # Target for std
    
    # Train model for mean
    model_mean = LinearRegression()
    model_mean.fit(X, y_mean)
    
    # Train model for std
    model_std = LinearRegression()
    model_std.fit(X, y_std)
    
    print(f"Models trained for role {role}")
    
    return model_mean, model_std

In [35]:
# Train models for each role
models = {}

models['P'] = train_regression_models(df_P, 'P')
models['D'] = train_regression_models(df_D, 'D')
models['C'] = train_regression_models(df_C, 'C')
models['A'] = train_regression_models(df_A, 'A')

Models trained for role P
Models trained for role D
Models trained for role C
Models trained for role A


In [36]:
# Accessing the trained models
model_mean_P, model_std_P = models['P']
model_mean_D, model_std_D = models['D']
model_mean_C, model_std_C = models['C']
model_mean_A, model_std_A = models['A']

In [37]:
# Example: Predicting for a new FVM value for role 'C'
new_FVM_value = 90
predicted_mean = model_mean_C.predict([[new_FVM_value]])
predicted_std = model_std_C.predict([[new_FVM_value]])

print(f"Predicted mean for role 'C' with FVM={new_FVM_value}: {predicted_mean[0]}")
print(f"Predicted std for role 'C' with FVM={new_FVM_value}: {predicted_std[0]}")

Predicted mean for role 'C' with FVM=90: 57.92913950979279
Predicted std for role 'C' with FVM=90: 8.559022015188885


## Save Stats and Regressors

In [38]:
# Save model to a file
def save_model(model, filename):
    with open(filename, 'wb') as file:
        joblib.dump(model, file)
    print(f"Model saved as {filename}")

# Assuming model_mean_P and model_std_P are your trained models
save_model(model_mean_P, 'regressors/model_mean_P.joblib')
save_model(model_std_P, 'regressors/model_std_P.joblib')
save_model(model_mean_D, 'regressors/model_mean_D.joblib')
save_model(model_std_D, 'regressors/model_std_D.joblib')
save_model(model_mean_C, 'regressors/model_mean_C.joblib')
save_model(model_std_C, 'regressors/model_std_C.joblib')
save_model(model_mean_A, 'regressors/model_mean_A.joblib')
save_model(model_std_A, 'regressors/model_std_A.joblib')

Model saved as regressors/model_mean_P.joblib
Model saved as regressors/model_std_P.joblib
Model saved as regressors/model_mean_D.joblib
Model saved as regressors/model_std_D.joblib
Model saved as regressors/model_mean_C.joblib
Model saved as regressors/model_std_C.joblib
Model saved as regressors/model_mean_A.joblib
Model saved as regressors/model_std_A.joblib


In [39]:
# Specify the file path where you want to save the Excel file
excel_file_path = current_season + '/player_prices_A.xlsx'

# Save the DataFrame to an Excel file
df_A.to_excel(excel_file_path, index=False)

print(f"Player statistics saved to {excel_file_path}")
# Specify the file path where you want to save the Excel file
excel_file_path = current_season + '/player_prices_C.xlsx'

# Save the DataFrame to an Excel file
df_C.to_excel(excel_file_path, index=False)

print(f"Player statistics saved to {excel_file_path}")
# Specify the file path where you want to save the Excel file
excel_file_path = current_season + '/player_prices_D.xlsx'

# Save the DataFrame to an Excel file
df_D.to_excel(excel_file_path, index=False)

print(f"Player statistics saved to {excel_file_path}")
# Specify the file path where you want to save the Excel file
excel_file_path = current_season + '/player_prices_P.xlsx'

# Save the DataFrame to an Excel file
df_P.to_excel(excel_file_path, index=False)

print(f"Player statistics saved to {excel_file_path}")

Player statistics saved to 24-25/player_prices_A.xlsx
Player statistics saved to 24-25/player_prices_C.xlsx
Player statistics saved to 24-25/player_prices_D.xlsx
Player statistics saved to 24-25/player_prices_P.xlsx
