## Using Deep Learning to predict the missing rating scores of players

In [1]:
# Libraries
import pandas as pd
import tensorflow as tf
import seaborn as sns
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.callbacks import ReduceLROnPlateau , EarlyStopping , ModelCheckpoint , LearningRateScheduler
import numpy as np
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import RandomOverSampler
from imblearn.combine import SMOTETomek
from sklearn.utils.class_weight import compute_class_weight

## Importing the data

In [70]:
encoded_df = pd.read_csv("encoded.csv")

In [71]:
match_summary = encoded_df.copy()

In [72]:
encoded_df

Unnamed: 0,Match,Game,Team,Opponent_x,Name,Rating,goal,Assist,yellowcard,redcard,...,OppForm_4-3-3 B,OppForm_4-4-2 A,OppForm_4-4-2 B,OppForm_4-5-1-,OppForm_5-2-3 A,OppForm_5-2-3 B,OppForm_5-3-1-1,Team_encoded,Opponent_x_encoded,Name_encoded
0,1,1,FC Groningen,Ajax,Verbruggen,5.0,0,0,0,0,...,1,0,0,0,0,0,0,7.128028,6.122581,6.442123
1,1,1,FC Groningen,Ajax,Benedetti,7.0,0,1,0,0,...,1,0,0,0,0,0,0,7.128028,6.122581,6.738950
2,1,1,FC Groningen,Ajax,Gabriel Paulista,6.0,0,0,0,0,...,1,0,0,0,0,0,0,7.128028,6.122581,6.605270
3,1,1,FC Groningen,Ajax,Thiaw,6.0,0,0,0,0,...,1,0,0,0,0,0,0,7.128028,6.122581,6.737280
4,1,1,FC Groningen,Ajax,Dedic,5.0,0,0,1,0,...,1,0,0,0,0,0,0,7.128028,6.122581,6.605270
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7214,39,9,Heracles Almelo,Fortuna Sittard,Núñez,,1,0,0,0,...,0,1,0,0,0,0,0,6.265176,6.758730,6.525178
7215,39,9,Heracles Almelo,Fortuna Sittard,Batshuayi,,0,0,0,0,...,0,1,0,0,0,0,0,6.265176,6.758730,6.831400
7216,39,9,Heracles Almelo,Fortuna Sittard,van Oorschot,,1,0,0,0,...,0,1,0,0,0,0,0,6.265176,6.758730,6.423937
7217,39,9,Heracles Almelo,Fortuna Sittard,Thuram,,0,2,0,0,...,0,1,0,0,0,0,0,6.265176,6.758730,6.569380


In [73]:
encoded_df.drop(['Team', 'Opponent_x', 'Name'], axis=1, inplace=True)

In [74]:
len(encoded_df)

7219

In [75]:
encoded_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7219 entries, 0 to 7218
Data columns (total 70 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Match                7219 non-null   int64  
 1   Game                 7219 non-null   int64  
 2   Rating               7018 non-null   float64
 3   goal                 7219 non-null   int64  
 4   Assist               7219 non-null   int64  
 5   yellowcard           7219 non-null   int64  
 6   redcard              7219 non-null   int64  
 7   redcardsecondyellow  7219 non-null   int64  
 8   injury               7219 non-null   int64  
 9   penaltygoal          7219 non-null   int64  
 10  penaltymiss          7219 non-null   int64  
 11  sub                  7219 non-null   int64  
 12  Player of the Match  7219 non-null   int64  
 13  TeamGoals            7219 non-null   int64  
 14  Conceded             7219 non-null   int64  
 15  Value                7219 non-null   f

In [76]:
# Checking the number of data from each rating value
encoded_df['Rating'].value_counts()

Rating
6.0    1820
7.0    1771
8.0    1642
5.0     868
9.0     542
4.0     269
3.0      70
2.0      32
1.0       4
Name: count, dtype: int64

## Splitting the data

In [77]:
# Splitting the data into training and testing
unknown_df = encoded_df[encoded_df['Match'] > 36]
actual_df = encoded_df[encoded_df['Match'] < 37]

In [78]:
actual_df['Rating'].nunique()

9

In [79]:
le = LabelEncoder()
actual_df['Rating'] = le.fit_transform(actual_df['Rating'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  actual_df['Rating'] = le.fit_transform(actual_df['Rating'])


In [80]:
ohe = OneHotEncoder()
ratings_onehot = ohe.fit_transform(actual_df['Rating'].values.reshape(-1, 1)).toarray()

In [81]:
X = actual_df.drop('Rating', axis=1)
y = ratings_onehot

In [82]:
# Oversampling the minority data
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

In [83]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [84]:
# Using standard scaler to normalise the data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Constructing the deep learning model

In [85]:
# Create and compile the model
num_classes = ratings_onehot.shape[1]

model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [86]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-3,
    decay_steps=10000,
    decay_rate=0.9)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
early_stopping = tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)

In [87]:
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [88]:
model.summary()

In [89]:
history = model.fit(X_train_scaled, y_train, 
                    epochs=300, 
                    batch_size=32,
                    validation_split=0.2,
                    callbacks=[early_stopping],
                    verbose=1)

Epoch 1/300
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.2747 - loss: 1.8283 - val_accuracy: 0.5982 - val_loss: 0.9890
Epoch 2/300
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5309 - loss: 1.1341 - val_accuracy: 0.6619 - val_loss: 0.8453
Epoch 3/300
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5850 - loss: 0.9613 - val_accuracy: 0.6736 - val_loss: 0.7864
Epoch 4/300
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6254 - loss: 0.8968 - val_accuracy: 0.6966 - val_loss: 0.7338
Epoch 5/300
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6464 - loss: 0.8395 - val_accuracy: 0.7052 - val_loss: 0.7006
Epoch 6/300
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6630 - loss: 0.7990 - val_accuracy: 0.7056 - val_loss: 0.6987
Epoch 7/300
[1m320/32

In [90]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Test accuracy: {test_accuracy:.4f}")

Test accuracy: 0.8301


## Testing the model with the remainder of the dataset

In [91]:
encoded_df.drop('Rating', axis=1, inplace=True)

In [92]:
predict_df = unknown_df.drop(['Rating'], axis=1)

In [93]:
predicted_scaled = scaler.transform(predict_df)


In [94]:
class_labels = sorted(actual_df['Rating'].unique())

In [95]:
# Make predictions
predictions = model.predict(predicted_scaled)

# Get the predicted class (index of the highest probability)
predicted_classes = np.argmax(predictions, axis=1)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step


In [96]:
# Map the predicted class indices to actual rating values
predicted_ratings = [class_labels[i] for i in predicted_classes]

In [97]:
# Add predictions to the original DataFrame
unknown_df['Predicted_Rating'] = predicted_ratings

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unknown_df['Predicted_Rating'] = predicted_ratings


In [98]:
interval_df = unknown_df.copy()
# interval_df = encoded_df[encoded_df['Rating'].notna()]

In [99]:
interval_df = interval_df[interval_df['Rating'].notna()]
interval_df

Unnamed: 0,Match,Game,Rating,goal,Assist,yellowcard,redcard,redcardsecondyellow,injury,penaltygoal,...,OppForm_4-4-2 A,OppForm_4-4-2 B,OppForm_4-5-1-,OppForm_5-2-3 A,OppForm_5-2-3 B,OppForm_5-3-1-1,Team_encoded,Opponent_x_encoded,Name_encoded,Predicted_Rating
6793,37,1,4.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,5.456954,6.958457,5.339958,3
6794,37,1,4.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,5.456954,6.958457,6.051470,3
6795,37,1,5.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,5.456954,6.958457,5.948557,3
6796,37,1,6.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,5.456954,6.958457,5.967812,3
6797,37,1,7.0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,5.456954,6.958457,6.008145,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7013,38,1,6.0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,7.820059,6.075145,7.394765,6
7014,38,1,6.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,7.820059,6.075145,7.662818,7
7015,38,1,6.0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,7.820059,6.075145,7.612833,6
7016,38,1,6.0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,7.820059,6.075145,7.230483,6


In [100]:
interval_df['UpperBound'] = interval_df['Predicted_Rating'] + 1
interval_df['LowerBound'] = interval_df['Predicted_Rating'] - 1
interval_df.tail()

Unnamed: 0,Match,Game,Rating,goal,Assist,yellowcard,redcard,redcardsecondyellow,injury,penaltygoal,...,OppForm_4-5-1-,OppForm_5-2-3 A,OppForm_5-2-3 B,OppForm_5-3-1-1,Team_encoded,Opponent_x_encoded,Name_encoded,Predicted_Rating,UpperBound,LowerBound
7013,38,1,6.0,0,0,1,0,0,0,0,...,0,0,0,0,7.820059,6.075145,7.394765,6,7,5
7014,38,1,6.0,0,0,0,0,0,0,0,...,0,0,0,0,7.820059,6.075145,7.662818,7,8,6
7015,38,1,6.0,0,0,0,0,0,1,0,...,0,0,0,0,7.820059,6.075145,7.612833,6,7,5
7016,38,1,6.0,0,0,1,0,0,0,0,...,0,0,0,0,7.820059,6.075145,7.230483,6,7,5
7017,38,1,6.0,0,0,0,0,0,0,0,...,0,0,0,0,7.820059,6.075145,6.941393,6,7,5


In [101]:
interval_df['inRange'] = np.where((interval_df['LowerBound'] <= interval_df['Rating']) & (interval_df['Rating'] <= interval_df['UpperBound']), 1, 0)
interval_df['onPoint'] = np.where(interval_df['Rating'] == interval_df['Predicted_Rating'], 1, 0)

In [102]:
inRange = interval_df['inRange'].sum()
onPoint = interval_df['onPoint'].sum()
rows = len(interval_df)

# Calculating the accuracy
accuracyRange = inRange/rows *100
accuracy = onPoint/rows *100

print(f'The accuracy of the model is {accuracyRange:.2f}% within the range')
print(f'The accuracy of the model is {accuracy:.2f}%.')

The accuracy of the model is 82.67% within the range
The accuracy of the model is 30.67%.


In [69]:
interval_df['Predicted_Rating'].value_counts()

Predicted_Rating
7    74
6    44
4    40
8    34
5    19
3    12
2     2
Name: count, dtype: int64

## Getting the full dataset

In [348]:
full_scaled = scaler.transform(encoded_df)

In [349]:
# Make predictions
predictions_full = model.predict(full_scaled)

# Get the predicted class (index of the highest probability)
full_predicted_classes = np.argmax(predictions_full, axis=1)

[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [350]:
# Map the predicted class indices to actual rating values
full_predicted_ratings = [class_labels[i] for i in full_predicted_classes]

In [351]:
# Add predictions to the original DataFrame
match_summary['Predicted_Rating'] = full_predicted_ratings

In [352]:
full_interval_df = match_summary.copy()

In [353]:
full_interval_df = full_interval_df[full_interval_df['Rating'].notna()]
full_interval_df

Unnamed: 0,Match,Game,Team,Opponent_x,Name,Rating,goal,Assist,yellowcard,redcard,...,position_LM,position_LW,position_RB,position_RM,position_RW,position_ST,Team_encoded,Opponent_x_encoded,Name_encoded,Predicted_Rating
0,1,1,FC Groningen,Ajax,Verbruggen,5.0,0,0,0,0,...,0,0,0,0,0,0,7.128028,6.122581,6.442123,4
1,1,1,FC Groningen,Ajax,Benedetti,7.0,0,1,0,0,...,0,0,0,0,0,0,7.128028,6.122581,6.738950,6
2,1,1,FC Groningen,Ajax,Gabriel Paulista,6.0,0,0,0,0,...,0,0,0,0,0,0,7.128028,6.122581,6.605270,5
3,1,1,FC Groningen,Ajax,Thiaw,6.0,0,0,0,0,...,0,0,0,0,0,0,7.128028,6.122581,6.737280,5
4,1,1,FC Groningen,Ajax,Dedic,5.0,0,0,1,0,...,0,0,1,0,0,0,7.128028,6.122581,6.605270,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7013,38,1,SC Heerenveen,Go Ahead Eagles,Fernandes,6.0,0,0,1,0,...,0,0,0,0,0,0,7.820059,6.075145,7.394765,6
7014,38,1,SC Heerenveen,Go Ahead Eagles,Vinicius Junior,6.0,0,0,0,0,...,0,1,0,0,0,0,7.820059,6.075145,7.662818,7
7015,38,1,SC Heerenveen,Go Ahead Eagles,Gyökeres,6.0,0,0,0,0,...,0,0,0,0,0,1,7.820059,6.075145,7.612833,4
7016,38,1,SC Heerenveen,Go Ahead Eagles,Sávio,6.0,0,0,1,0,...,0,0,0,0,1,0,7.820059,6.075145,7.230483,6


In [354]:
full_interval_df['UpperBound'] = full_interval_df['Predicted_Rating'] + 1
full_interval_df['LowerBound'] = full_interval_df['Predicted_Rating'] - 1
full_interval_df.tail()

Unnamed: 0,Match,Game,Team,Opponent_x,Name,Rating,goal,Assist,yellowcard,redcard,...,position_RB,position_RM,position_RW,position_ST,Team_encoded,Opponent_x_encoded,Name_encoded,Predicted_Rating,UpperBound,LowerBound
7013,38,1,SC Heerenveen,Go Ahead Eagles,Fernandes,6.0,0,0,1,0,...,0,0,0,0,7.820059,6.075145,7.394765,6,7,5
7014,38,1,SC Heerenveen,Go Ahead Eagles,Vinicius Junior,6.0,0,0,0,0,...,0,0,0,0,7.820059,6.075145,7.662818,7,8,6
7015,38,1,SC Heerenveen,Go Ahead Eagles,Gyökeres,6.0,0,0,0,0,...,0,0,0,1,7.820059,6.075145,7.612833,4,5,3
7016,38,1,SC Heerenveen,Go Ahead Eagles,Sávio,6.0,0,0,1,0,...,0,0,1,0,7.820059,6.075145,7.230483,6,7,5
7017,38,1,SC Heerenveen,Go Ahead Eagles,Al-Saed,6.0,0,0,0,0,...,0,0,0,0,7.820059,6.075145,6.941393,6,7,5


In [355]:
full_interval_df['inRange'] = np.where((full_interval_df['LowerBound'] <= full_interval_df['Rating']) & (full_interval_df['Rating'] <= full_interval_df['UpperBound']), 1, 0)
full_interval_df['onPoint'] = np.where(full_interval_df['Rating'] == full_interval_df['Predicted_Rating'], 1, 0)

In [356]:
full_interval_df = full_interval_df[full_interval_df['Match'] > 36]
full_interval_df

Unnamed: 0,Match,Game,Team,Opponent_x,Name,Rating,goal,Assist,yellowcard,redcard,...,position_RW,position_ST,Team_encoded,Opponent_x_encoded,Name_encoded,Predicted_Rating,UpperBound,LowerBound,inRange,onPoint
6793,37,1,Almere City,Heracles Almelo,Bakker,4.0,0,0,0,0,...,0,0,5.456954,6.958457,5.339958,3,4,2,1,0
6794,37,1,Almere City,Heracles Almelo,Floranus,4.0,0,0,0,0,...,0,0,5.456954,6.958457,6.051470,3,4,2,1,0
6795,37,1,Almere City,Heracles Almelo,Akujobi,5.0,0,0,0,0,...,0,0,5.456954,6.958457,5.948557,3,4,2,0,0
6796,37,1,Almere City,Heracles Almelo,Zagaritis,6.0,0,0,0,0,...,0,0,5.456954,6.958457,5.967812,5,6,4,1,0
6797,37,1,Almere City,Heracles Almelo,Barbet,7.0,1,0,0,0,...,0,0,5.456954,6.958457,6.008145,6,7,5,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7013,38,1,SC Heerenveen,Go Ahead Eagles,Fernandes,6.0,0,0,1,0,...,0,0,7.820059,6.075145,7.394765,6,7,5,1,1
7014,38,1,SC Heerenveen,Go Ahead Eagles,Vinicius Junior,6.0,0,0,0,0,...,0,0,7.820059,6.075145,7.662818,7,8,6,1,0
7015,38,1,SC Heerenveen,Go Ahead Eagles,Gyökeres,6.0,0,0,0,0,...,0,1,7.820059,6.075145,7.612833,4,5,3,0,0
7016,38,1,SC Heerenveen,Go Ahead Eagles,Sávio,6.0,0,0,1,0,...,1,0,7.820059,6.075145,7.230483,6,7,5,1,1


In [357]:
inRange = full_interval_df['inRange'].sum()
onPoint = full_interval_df['onPoint'].sum()
rows = len(full_interval_df)

# Calculating the accuracy
accuracyRange = inRange/rows *100
accuracy = onPoint/rows *100

print(f'The accuracy of the model is {accuracyRange:.2f}% within the range')
print(f'The accuracy of the model is {accuracy:.2f}%.')

The accuracy of the model is 85.78% within the range
The accuracy of the model is 30.22%.


In [359]:
match_summary.columns

Index(['Match', 'Game', 'Team', 'Opponent_x', 'Name', 'Rating', 'goal',
       'Assist', 'yellowcard', 'redcard', 'redcardsecondyellow', 'injury',
       'penaltygoal', 'penaltymiss', 'sub', 'Player of the Match', 'TeamGoals',
       'Conceded', 'Value', 'location_Away', 'location_Home', 'outcome_D',
       'outcome_L', 'outcome_W', 'position_CAM', 'position_CB', 'position_CDM',
       'position_CM', 'position_GK', 'position_LB', 'position_LM',
       'position_LW', 'position_RB', 'position_RM', 'position_RW',
       'position_ST', 'Team_encoded', 'Opponent_x_encoded', 'Name_encoded',
       'Predicted_Rating'],
      dtype='object')

In [360]:
match_summary


Unnamed: 0,Match,Game,Team,Opponent_x,Name,Rating,goal,Assist,yellowcard,redcard,...,position_LM,position_LW,position_RB,position_RM,position_RW,position_ST,Team_encoded,Opponent_x_encoded,Name_encoded,Predicted_Rating
0,1,1,FC Groningen,Ajax,Verbruggen,5.0,0,0,0,0,...,0,0,0,0,0,0,7.128028,6.122581,6.442123,4
1,1,1,FC Groningen,Ajax,Benedetti,7.0,0,1,0,0,...,0,0,0,0,0,0,7.128028,6.122581,6.738950,6
2,1,1,FC Groningen,Ajax,Gabriel Paulista,6.0,0,0,0,0,...,0,0,0,0,0,0,7.128028,6.122581,6.605270,5
3,1,1,FC Groningen,Ajax,Thiaw,6.0,0,0,0,0,...,0,0,0,0,0,0,7.128028,6.122581,6.737280,5
4,1,1,FC Groningen,Ajax,Dedic,5.0,0,0,1,0,...,0,0,1,0,0,0,7.128028,6.122581,6.605270,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7214,39,9,Heracles Almelo,Fortuna Sittard,Núñez,,1,0,0,0,...,0,0,0,0,0,1,6.265176,6.758730,6.525178,7
7215,39,9,Heracles Almelo,Fortuna Sittard,Batshuayi,,0,0,0,0,...,0,0,0,0,0,1,6.265176,6.758730,6.831400,6
7216,39,9,Heracles Almelo,Fortuna Sittard,van Oorschot,,1,0,0,0,...,0,0,0,0,0,1,6.265176,6.758730,6.423937,6
7217,39,9,Heracles Almelo,Fortuna Sittard,Thuram,,0,2,0,0,...,0,0,0,0,0,1,6.265176,6.758730,6.569380,7


In [363]:
match_summary.columns

Index(['Match', 'Game', 'Team', 'Opponent_x', 'Name', 'Rating', 'goal',
       'Assist', 'yellowcard', 'redcard', 'redcardsecondyellow', 'injury',
       'penaltygoal', 'penaltymiss', 'sub', 'Player of the Match', 'TeamGoals',
       'Conceded', 'Value', 'location_Away', 'location_Home', 'outcome_D',
       'outcome_L', 'outcome_W', 'position_CAM', 'position_CB', 'position_CDM',
       'position_CM', 'position_GK', 'position_LB', 'position_LM',
       'position_LW', 'position_RB', 'position_RM', 'position_RW',
       'position_ST', 'Team_encoded', 'Opponent_x_encoded', 'Name_encoded',
       'Predicted_Rating'],
      dtype='object')

In [365]:
# List of position columns
position_columns = ['position_CAM', 'position_CB', 'position_CDM', 'position_CM', 
                    'position_GK', 'position_LB', 'position_LM', 'position_LW', 
                    'position_RB', 'position_RM', 'position_RW', 'position_ST']

# Create a new 'Position' column
match_summary['Position'] = match_summary[position_columns].idxmax(axis=1).str.replace('position_', '')

In [367]:
# Drop the original position columns
match_summary = match_summary.drop(columns=position_columns, axis=1)

In [369]:
# Exporting the data
# match_summary.to_csv("final_player_data.csv" , index=False)

In [89]:
interval_df

Unnamed: 0,Match,Game,goal,yellowcard,redcard,redcardsecondyellow,injury,penaltygoal,penaltymiss,sub,...,outcome_L,outcome_W,Team_encoded,Opponent_y_encoded,Name_encoded,Predicted_Rating,Rating,UpperBound,LowerBound,inRange
0,1,1,0,0,0,0,0,0,0,0,...,1,0,7.128028,6.122581,6.442123,5,5.0,6,4,1
1,1,1,0,0,0,0,0,0,0,0,...,1,0,7.128028,6.122581,6.738950,5,7.0,6,4,0
2,1,1,0,0,0,0,0,0,0,0,...,1,0,7.128028,6.122581,6.605270,5,6.0,6,4,1
3,1,1,0,0,0,0,0,0,0,0,...,1,0,7.128028,6.122581,6.737280,5,6.0,6,4,1
4,1,1,0,1,0,0,0,0,0,0,...,1,0,7.128028,6.122581,6.605270,4,5.0,5,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7013,38,1,0,1,0,0,0,0,0,0,...,1,0,7.820059,6.075145,7.394765,4,6.0,5,3,0
7014,38,1,0,0,0,0,0,0,0,0,...,1,0,7.820059,6.075145,7.662818,6,6.0,7,5,1
7015,38,1,0,0,0,0,1,0,0,0,...,1,0,7.820059,6.075145,7.612833,4,6.0,5,3,0
7016,38,1,0,1,0,0,0,0,0,0,...,1,0,7.820059,6.075145,7.230483,4,6.0,5,3,0


In [575]:
unknown_df.to_csv("prediction.csv")

In [586]:
names_df = match_summary[match_summary['Match'] > 36]
names_df2 = match_summary[match_summary['Match'] < 37]

In [582]:
names_df.loc[:, 'Predicted_Ratings'] = predicted_ratings

In [587]:
# Exporting predictions with summary of each players
# names_df.to_csv("predictions_with_names.csv", index=False)
names_df2.to_csv("df_with_names.csv", index=False)

In [None]:
# Importing the player rating dataset
