In [None]:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow.keras import metrics
from tensorflow.keras.layers import Dense
from tensorflow.keras import activations
from tensorflow.keras import callbacks
from tensorflow.keras import models

In [None]:
cols = pd.read_csv("data.csv", nrows = 0).columns.tolist()
remove_cols = [0, 1, 2, 4, 5, 6, 9, 10, 13, 19, 20, 22, 23, 24, 25, 26, 27, 88]
cats = ["Work Rate", "Position"]
for i in range(28, 54):
    remove_cols.append(i)

In [None]:
def currency_to_number(np_array):
    np_array = np_array.str.replace("€", "")
    np_array = np_array.replace({'K': '*1e3', 'M': '*1e6'}, 
                                regex=True).map(pd.eval).astype(float)
    return np_array

In [None]:
def create_df(file_name):
    _df = pd.read_csv(file_name, delim_whitespace=False, names=cols, skiprows=1)
    names = _df["Name"]
    _df.drop(_df.columns[remove_cols],axis=1,inplace=True)
    _df["Preferred Foot"].replace("Left", 1, inplace=True)
    _df["Preferred Foot"].replace("Right", 0, inplace=True)
    _df = pd.get_dummies(_df, prefix=cats, columns=cats)
    _df["Value"] = currency_to_number(_df["Value"])
    _df["Wage"] = currency_to_number(_df["Wage"])
    return _df, names

df, _ = create_df("data.csv")
mins = {}
maxs = {}
for a in df.columns:
    if a not in cats:
        mins[a] = df[a].min()
        maxs[a] = df[a].max()

def scale_data(_df):
    for a in _df.columns:
        if a not in cats:
            _df[a] = (_df[a] - mins[a]) / (maxs[a] - mins[a])
    return _df

df = scale_data(df)
corr_matrix = df.corr()
drop_cols = []
for i in range(0, len(corr_matrix["Value"])):
    if abs(corr_matrix["Value"][i]) < 0.15 and "GK" not in df.columns[i]:
        drop_cols.append(i)
    if "GK" in df.columns[i]:
        df[df.columns[i]] = df[df.columns[i]] * df["Position_GK"]
df = df.drop(df.columns[drop_cols], axis=1).fillna(0)
df[df < 0] = 0
df[df > 1] = 1

df

Unnamed: 0,Overall,Potential,Value,Wage,International Reputation,Weak Foot,Skill Moves,Crossing,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,Acceleration,SprintSpeed,Agility,Reactions,ShotPower,Stamina,LongShots,Aggression,Positioning,Vision,Penalties,Composure,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Position_GK
0,1.000000,0.978723,0.932489,1.000000,1.00,0.75,0.75,0.897727,1.000000,0.733333,0.965116,0.953488,1.000000,0.988636,1.000000,0.928571,1.000000,0.929412,0.880952,0.939024,0.986667,0.892473,0.714286,1.000000,0.440476,0.989247,1.000000,0.804598,1.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0
1,1.000000,0.978723,0.649789,0.716814,1.00,0.75,1.00,0.897727,0.989247,0.944444,0.860465,0.965116,0.903226,0.852273,0.802198,0.809524,0.978022,0.905882,0.940476,0.890244,1.000000,1.000000,0.904762,0.989011,0.619048,1.000000,0.857143,0.919540,0.989247,0.0,0.000000,0.000000,0.000000,0.0,0.0
2,0.958333,0.957447,1.000000,0.513274,1.00,1.00,1.00,0.840909,0.913978,0.644444,0.895349,0.930233,0.989247,0.931818,0.923077,0.821429,0.989011,0.964706,0.928571,1.000000,0.973333,0.838710,0.821429,0.868132,0.535714,0.935484,0.916667,0.873563,0.978495,0.0,0.000000,0.000000,0.000000,0.0,0.0
3,0.937500,0.957447,0.607595,0.460177,0.75,0.50,0.00,0.136364,0.118280,0.188889,0.500000,0.104651,0.150538,0.170455,0.175824,0.500000,0.406593,0.529412,0.547619,0.560976,0.920000,0.311828,0.369048,0.098901,0.321429,0.107527,0.690476,0.402299,0.698925,1.0,0.923077,0.955556,0.977528,1.0,1.0
4,0.937500,0.936170,0.860759,0.628319,0.75,1.00,0.75,1.000000,0.860215,0.566667,0.988372,0.906977,0.881720,0.897727,0.879121,0.976190,0.945055,0.776471,0.761905,0.792683,0.933333,0.956989,0.928571,0.967033,0.773810,0.913978,1.000000,0.850575,0.913978,0.0,0.000000,0.000000,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18202,0.020833,0.361702,0.000506,0.001770,0.00,0.25,0.25,0.329545,0.387097,0.400000,0.488372,0.244186,0.408602,0.272727,0.340659,0.428571,0.417582,0.494118,0.535714,0.560976,0.373333,0.440860,0.333333,0.384615,0.416667,0.397849,0.500000,0.436782,0.451613,0.0,0.000000,0.000000,0.000000,0.0,0.0
18203,0.020833,0.319149,0.000506,0.001770,0.00,0.25,0.25,0.204545,0.537634,0.533333,0.418605,0.372093,0.376344,0.295455,0.186813,0.190476,0.384615,0.341176,0.321429,0.292683,0.253333,0.419355,0.369048,0.428571,0.428571,0.473118,0.273810,0.436782,0.419355,0.0,0.000000,0.000000,0.000000,0.0,0.0
18204,0.020833,0.404255,0.000506,0.001770,0.00,0.50,0.25,0.227273,0.408602,0.466667,0.360465,0.395349,0.440860,0.363636,0.263736,0.226190,0.428571,0.682353,0.678571,0.439024,0.346667,0.462366,0.511905,0.461538,0.250000,0.494624,0.392857,0.574713,0.408602,0.0,0.000000,0.000000,0.000000,0.0,0.0
18205,0.020833,0.382979,0.000506,0.001770,0.00,0.50,0.25,0.443182,0.516129,0.388889,0.406977,0.418605,0.505376,0.318182,0.318681,0.273810,0.516484,0.576471,0.571429,0.463415,0.000000,0.666667,0.333333,0.340659,0.261905,0.451613,0.440476,0.517241,0.462366,0.0,0.000000,0.000000,0.000000,0.0,0.0


In [None]:
y = df["Value"]
X = df.drop(columns=["Value"])
xtrain, xvaltest, ytrain, yvaltest = train_test_split(X, y, test_size=0.3)
xval, xtest, yval, ytest = train_test_split(xvaltest, yvaltest, test_size=0.5)

weight_neuron = []

train_len = len(xtrain)
test_len = len(xtest)
y_len = len(ytrain)

class CustomCallback(callbacks.Callback):
    def on_train_batch_end(self, batch, logs=None):
        temp = self.model.layers[2].get_weights()[0].tolist()
        weight_neuron.append(temp)

model = models.Sequential()
model.add(Dense(85,activation="sigmoid",input_dim=34))
model.add(Dense(50,activation="sigmoid"))
model.add(Dense(1,activation="sigmoid"))

model.compile(optimizer="adam",
            loss="mean_squared_error")

weight_neuron = []

history_train = model.fit(xtrain, ytrain,
                batch_size=32, epochs=100,
                validation_data=(xval, yval),
                callbacks=[CustomCallback()],
                )


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
new_df, names = create_df("test_data.csv")
new_df = scale_data(new_df)
for col in new_df.columns:
    if col not in X.columns:
        new_df.drop(columns=[col], inplace=True)
for i in range(0, len(X.columns)):
    if X.columns[i] not in new_df.columns:
        index = new_df.index
        number_of_rows = len(index)
        new_df.insert(i, X.columns[i], np.zeros(number_of_rows))

predictions = model.predict(new_df)
for i in range(0, len(predictions)):
    val = (predictions[i] * (maxs["Value"] - mins["Value"])) + mins["Value"]
    prefix = ""
    if val > 1000000:
        prefix = "M"
        val = val / 1000000
    elif val > 1000:
        prefix = "K"
        val = val / 1000
    print(names[i] + " is worth €" + str(val[0].round(2)) + prefix)

A is worth €20.86M
B is worth €58.29M
C is worth €29.17M
D is worth €12.29M
E is worth €79.68M
F is worth €4.87M
G is worth €45.65M
H is worth €65.8M
I is worth €51.25K
