# Imports

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib

from scipy.stats import norm

# Load the datasets

In [5]:
# load dataset squads
print("PLAYERS")
urlPlayers = 'output_rating.csv'
dataframePlayers = pd.read_csv(urlPlayers, header=0, index_col=None)
# summarize shape
print("Shape:" + str(dataframePlayers.shape))
# summarize first few lines
print("Summary Players")
print(dataframePlayers)
# load dataset squads
print("SQUADS")
urlSquads = 'squads.csv'
dataframeSquads = pd.read_csv(urlSquads, header=0, index_col=None)
# summarize shape
print("Shape:" + str(dataframeSquads.shape))
# summarize first few lines
print("Summary Squads")
print(dataframeSquads)

PLAYERS
Shape:(539, 24)
Summary Players
       Id Role         Name     Squad  Price  MyRating         Mate  \
0    4431    P  Carnesecchi  Atalanta     10       1.4        Musso   
1    2792    P        Musso  Atalanta      5       2.4  Carnesecchi   
2    2297    P     Rossi F.  Atalanta      1       1.0          NaN   
3     554    D   Zappacosta  Atalanta     15       4.4         Holm   
4    5067    D       Bakker  Atalanta     11       4.0      Ruggeri   
..    ...  ...          ...       ...    ...       ...          ...   
534  5529    A        Henry    Verona     11       1.9          NaN   
535  5471    A       Djuric    Verona     10       1.8    Bonazzoli   
536   505    A    Bonazzoli    Verona      8       2.3       Djuric   
537  5395    A        Braaf    Verona      1       1.0          NaN   
538  5439    A       Kallon    Verona      1       1.0          NaN   

     Regularness  FVM  Pg22_23  ...  Mf21_22  Pg20_21  Mv20_21  Mf20_21  \
0              3   10       27  

# Initialize variables

In [6]:
roles = ['P', 'D', 'C', 'A']
columns = ["Pg", "Mv", "Mf"]
seasons = ["22_23", "21_22", "20_21", "19_20", "18_19"]
min_matches = 11

# Create subdatasets given the roles of players

In [7]:
# Create the separate dataframes for ease of use
dataframeKeep = dataframePlayers[dataframePlayers['Role'] == roles[0]]
dataframeDef = dataframePlayers[dataframePlayers['Role'] == roles[1]]
dataframeMid = dataframePlayers[dataframePlayers['Role'] == roles[2]]
print("Esempio Centrocampisti")
print(dataframeMid)
dataframeAtt = dataframePlayers[dataframePlayers['Role'] == roles[3]]
# Put in one list
dataframes = [dataframeKeep, dataframeDef, dataframeMid, dataframeAtt]

Esempio Centrocampisti
       Id Role          Name     Squad  Price  MyRating          Mate  \
15   5685    C   Koopmeiners  Atalanta     24       4.8           NaN   
16   2077    C       Pasalic  Atalanta     13       4.0  Ederson D.s.   
17   5792    C  Ederson D.s.  Atalanta     12       2.8       Pasalic   
18     22    C       De Roon  Atalanta     11       3.0           NaN   
19   5001    C     Miranchuk  Atalanta      9       3.4           NaN   
..    ...  ...           ...       ...    ...       ...           ...   
528  6480    C    Tchatchoua    Verona      4       1.0           NaN   
529  6252    C    Folorunsho    Verona      3       2.0       Lazovic   
530  6486    C        Suslov    Verona      3       1.0           NaN   
531  5947    C       Hrustic    Verona      2       1.0           NaN   
532  6254    C      Joselito    Verona      1       1.0           NaN   

     Regularness  FVM  Pg22_23  ...  Mf21_22  Pg20_21  Mv20_21  Mf20_21  \
15             5  112    

# Replace the names of squads in the Players dataframe

In [8]:
# Create a backup of the original 'Squad' column
dataframePlayers['Original_Squad'] = dataframePlayers['Squad'].copy()

for index, row in dataframePlayers.iterrows():
    # Get squad name
    squad = row['Squad']
    # Find the corresponding row in dataframeSquads where 'Name' matches 'Squad'
    squad_row = dataframeSquads[dataframeSquads['Name'] == squad]
    # Update 
    value = squad_row['Value'].values[0]
    dataframePlayers.at[index, 'Squad'] = value

print(dataframePlayers.head(1))

     Id Role         Name Squad  Price  MyRating   Mate  Regularness  FVM  \
0  4431    P  Carnesecchi     4     10       1.4  Musso            3   10   

   Pg22_23  ...  Pg20_21  Mv20_21  Mf20_21  Pg19_20  Mv19_20  Mf19_20  \
0       27  ...        0      0.0      0.0        0      0.0      0.0   

   Pg18_19  Mv18_19  Mf18_19  Original_Squad  
0        0      0.0      0.0        Atalanta  

[1 rows x 25 columns]


# Load the regressors

Regressors are to be tuples of: [Squad, Price, MyRating, Regularness, FVM]


In [9]:
# Initialize the regressors array
regressors = []
# Load the saved SVM regressor model
for i, role in enumerate(roles):
    regressors.append(joblib.load('ridge_regressor_model_' + roles[i] + '.pkl'))

row = [5, 40, 4.5, 5, 220]
regressors[3].predict([row])

array([[125.07237654,  22.62197594]])

# Run the regression

Here you have to update the Pgxx_xx to the last season number

In [10]:
# Scalings of the value to 800 credits (base 500)
#scaling_per_role_800 = [1.1, 1.1, 1.15, 1.35]
# Specify the columns you want to extract
columns_to_extract = ['Squad', 'Price', "MyRating", "Regularness", "FVM"]

# Create a new column with NaN values
new_column = pd.Series([np.nan] * len(dataframePlayers), name='ExpectedPrice')
new_column = pd.Series([np.nan] * len(dataframePlayers), name='ExpectedPriceStd')
# Specify the position to insert the new column (between 'Column2' and 'Column4')
position = dataframePlayers.columns.get_loc('Pg22_23')
# Insert the new column 'ExpectedPrice' at the specified position
dataframePlayers.insert(position, 'ExpectedPriceStd', new_column)
# Insert the new column 'ExpectedPrice' at the specified position
dataframePlayers.insert(position, 'ExpectedPrice', new_column)

for index, row in dataframePlayers.iterrows():
    # Get the index of the role of the player
    i = roles.index(row['Role'])
    # Get the tuple to predict
    x = row.loc[columns_to_extract]
    # Predict the value
    prediction = regressors[i].predict([x]) #* scaling_per_role_800[i]
    temp_mean = prediction[0][0]

    temp_std = prediction[0][1]
    # Round and turn to int
    temp_mean = int(round(temp_mean))
    temp_std = int(round(temp_std))
    # Put to one if less
    temp_mean = 1 if temp_mean < 1 else temp_mean
    temp_std = 1 if temp_std < 0 else temp_std
    # Put the value in the column
    dataframePlayers.at[index, 'ExpectedPrice'] = temp_mean
    dataframePlayers.at[index, 'ExpectedPriceStd'] = temp_std
# Convert the 'ExpectedPrice' column to integers
dataframePlayers['ExpectedPrice'] = dataframePlayers['ExpectedPrice'].astype(int)
dataframePlayers['ExpectedPriceStd'] = dataframePlayers['ExpectedPriceStd'].astype(int)
print(dataframePlayers.head(1))

     Id Role         Name Squad  Price  MyRating   Mate  Regularness  FVM  \
0  4431    P  Carnesecchi     4     10       1.4  Musso            3   10   

   ExpectedPrice  ...  Pg20_21  Mv20_21  Mf20_21  Pg19_20  Mv19_20  Mf19_20  \
0              3  ...        0      0.0      0.0        0      0.0      0.0   

   Pg18_19  Mv18_19  Mf18_19  Original_Squad  
0        0      0.0      0.0        Atalanta  

[1 rows x 27 columns]


# Revert the Squad column

In [11]:
# To revert the operation, you can restore the original values from the backup
dataframePlayers['Squad'] = dataframePlayers['Original_Squad']
# Remove the 'Original_Squad' column after reverting the operation
dataframePlayers = dataframePlayers.drop(columns=['Original_Squad'])
print("\nAfter Reverting:")
print(dataframePlayers)


After Reverting:
       Id Role         Name     Squad  Price  MyRating         Mate  \
0    4431    P  Carnesecchi  Atalanta     10       1.4        Musso   
1    2792    P        Musso  Atalanta      5       2.4  Carnesecchi   
2    2297    P     Rossi F.  Atalanta      1       1.0          NaN   
3     554    D   Zappacosta  Atalanta     15       4.4         Holm   
4    5067    D       Bakker  Atalanta     11       4.0      Ruggeri   
..    ...  ...          ...       ...    ...       ...          ...   
534  5529    A        Henry    Verona     11       1.9          NaN   
535  5471    A       Djuric    Verona     10       1.8    Bonazzoli   
536   505    A    Bonazzoli    Verona      8       2.3       Djuric   
537  5395    A        Braaf    Verona      1       1.0          NaN   
538  5439    A       Kallon    Verona      1       1.0          NaN   

     Regularness  FVM  ExpectedPrice  ...  Mf21_22  Pg20_21  Mv20_21  Mf20_21  \
0              3   10              3  ...     0.

# Save the ready to use .csv 

Update just the name every season

In [12]:
dataframePlayers.to_csv('players23_24_trial.csv', index=False, sep=',', encoding='utf-8')
#dataframePlayers.to_csv('players23_24_trial_excel.csv', index=False, sep=';', encoding='utf-8')

# Save without the stats columns

In [3]:
# Get the index of the specified column
column_index = dataframePlayers.columns.get_loc('ExpectedPrice')
# Drop all columns after the specified column
dataframePlayers = dataframePlayers.iloc[:, :column_index + 1]
# Save to file
dataframePlayers.to_csv('players23_24_nostats.csv', index=False, sep=',', encoding='utf-8')

NameError: name 'dataframePlayers' is not defined