In [None]:
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense, LSTM

df = pd.read_json("players.json")
df.head()

In [None]:
df2 = json_normalize(df['stats'])
df2.head()

In [None]:
list(df2.columns)

In [None]:
df['cost'].describe() #Descriptive Statistics

In [None]:
df['cost'].plot.hist()

In [None]:
df2['avg_points'].describe() #Descriptive Statistics

In [None]:
df2['avg_points'].plot.hist()

In [None]:
df2['selections'].describe() #Descriptive Statistics

In [None]:
df2['selections'].plot.hist()

In [None]:
df3 = pd.read_csv("players_metadata.csv", sep='|')
df3.head()

In [None]:
df['Player_Name']= df['last_name'].str.upper() + ','+' '+ df['first_name'].str.upper()
df.head()

In [None]:
result = pd.merge(df,df3, on='Player_Name')
result = result.drop("Player_Name", axis=1)
result.head()

In [None]:
result['nationality'].value_counts()[:3].sort_values(ascending=False) #Top 3 nationatilies from merged data frame

In [None]:
usa=result.loc[result['nationality']=='United States of America']
usa_guard=usa.loc[usa['position']=='Guard']
usa_center=usa.loc[usa['position']=='Center']
usa_forward=usa.loc[usa['position']=='Forward']
usa_guard=json_normalize(usa_guard['stats'])
usa_center=json_normalize(usa_center['stats'])
usa_forward=json_normalize(usa_forward['stats'])

serbia=result.loc[result['nationality']=='Serbia']
serbia_guard=serbia.loc[serbia['position']=='Guard']
serbia_center=serbia.loc[serbia['position']=='Center']
serbia_forward=serbia.loc[serbia['position']=='Forward']
serbia_guard=json_normalize(serbia_guard['stats'])
serbia_center=json_normalize(serbia_center['stats'])
serbia_forward=json_normalize(serbia_forward['stats'])

spain=result.loc[result['nationality']=='Spain']
spain_guard=spain.loc[spain['position']=='Guard']
spain_center=spain.loc[spain['position']=='Center']
spain_forward=spain.loc[spain['position']=='Forward']
spain_guard=json_normalize(spain_guard['stats'])
spain_center=json_normalize(spain_center['stats'])
spain_forward=json_normalize(spain_forward['stats'])

In [None]:
#3 boxplots for each of the position, depending on nationalities
guard_plot = [usa_guard['avg_points'],serbia_guard['avg_points'],spain_guard['avg_points']]
center_plot = [usa_center['avg_points'],serbia_center['avg_points'],spain_center['avg_points']]
forward_plot = [usa_forward['avg_points'],serbia_forward['avg_points'],spain_forward['avg_points']]

In [None]:
fig = plt.figure(1, figsize=(9, 6))
fig.suptitle('Guard Position Average Points', fontsize=14, fontweight='bold')
ax = fig.add_subplot(111)
#fill color
bp = ax.boxplot(guard_plot, patch_artist=True)
ax.set_xticklabels(['USA', 'Serbia', 'Spain'])
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
#outline color, fill color and linewidth of the boxes
for box in bp['boxes']:
    box.set( color='#7570b3', linewidth=2)
    box.set( facecolor = '#1b9e77' )
#color and linewidth of the whiskers
for whisker in bp['whiskers']:
    whisker.set(color='#7570b3', linewidth=2)
#color and linewidth of the caps
for cap in bp['caps']:
    cap.set(color='#7570b3', linewidth=2)
#color and linewidth of the medians
for median in bp['medians']:
    median.set(color='#b2df8a', linewidth=2)
#style of fliers and their fill
for flier in bp['fliers']:
    flier.set(marker='o', color='#e7298a', alpha=0.5)

fig.savefig('guard.png', bbox_inches='tight')


In [None]:
fig = plt.figure(1, figsize=(9, 6))
fig.suptitle('Center Position Average Points', fontsize=14, fontweight='bold')
ax = fig.add_subplot(111)
#fill color
bp = ax.boxplot(center_plot, patch_artist=True)
ax.set_xticklabels(['USA', 'Serbia', 'Spain'])
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
#outline color, fill color and linewidth of the boxes
for box in bp['boxes']:
    box.set( color='#7570b3', linewidth=2)
    box.set( facecolor = '#1b9e77' )
#color and linewidth of the whiskers
for whisker in bp['whiskers']:
    whisker.set(color='#7570b3', linewidth=2)
#color and linewidth of the caps
for cap in bp['caps']:
    cap.set(color='#7570b3', linewidth=2)
#color and linewidth of the medians
for median in bp['medians']:
    median.set(color='#b2df8a', linewidth=2)
#style of fliers and their fill
for flier in bp['fliers']:
    flier.set(marker='o', color='#e7298a', alpha=0.5)

fig.savefig('center.png', bbox_inches='tight')


In [None]:
fig = plt.figure(1, figsize=(9, 6))
fig.suptitle('Forward Position Average Points', fontsize=14, fontweight='bold')
ax = fig.add_subplot(111)
#fill color
bp = ax.boxplot(forward_plot, patch_artist=True)
ax.set_xticklabels(['USA', 'Serbia', 'Spain'])
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
#outline color, fill color and linewidth of the boxes
for box in bp['boxes']:
    box.set( color='#7570b3', linewidth=2)
    box.set( facecolor = '#1b9e77' )
#color and linewidth of the whiskers
for whisker in bp['whiskers']:
    whisker.set(color='#7570b3', linewidth=2)
#color and linewidth of the caps
for cap in bp['caps']:
    cap.set(color='#7570b3', linewidth=2)
#color and linewidth of the medians
for median in bp['medians']:
    median.set(color='#b2df8a', linewidth=2)
#style of fliers and their fill
for flier in bp['fliers']:
    flier.set(marker='o', color='#e7298a', alpha=0.5)

fig.savefig('forward.png', bbox_inches='tight')


In [None]:
#Here, I extracted the Price Change, Percentage Change, Score and Price informations for each players' each round;
#and merged them in a nested dictionary.
double_dict = { }
pricing=[]

for i in range (274):
    double_dict[str(i)] = {}
    scores = df['stats'][i]['scores']
    prices = df['stats'][i]['prices']
    for j in range (33):
        double_dict[str(i)][str(j)] = {}
        price= prices.get(str(j+1),1)##It's not the best way to handle missing values but I don't have enough time to find a sufficient solution
        next_price= prices.get(str(j+2),1)
        price_change= next_price - price
        score = scores.get(str(j+1),1)
        double_dict[str(i)][str(j)]['Price'] = price
        double_dict[str(i)][str(j)]['Price_Change'] = price_change
        double_dict[str(i)][str(j)]['Percentage'] = price_change/price
        double_dict[str(i)][str(j)]['Score'] = score

In [None]:
#Here I implemented a LSTM to interpret the data in temporal dimension.

model = Sequential()
model.add(LSTM(50, input_shape=(25,1)))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')

for i in  range(274):
    scores = df['stats'][i]['scores']
    prices = df['stats'][i]['prices']
    pricing=[]
    scoring=[]
    for j in range(33):
        price= prices.get(str(j+1),1)
        pricing.append(price)
        score = scores.get(str(j+1),1)
        scoring.append(score)
    train_p=np.asarray(pricing[0:25])
    test_p=np.asarray(pricing[25:33])
    train_p = np.expand_dims(train_p, axis=0)
    train_p = train_p.reshape((train_p.shape[1], 1, train_p.shape[0]))
    test_p = np.expand_dims(test_p, axis=0)
    test_p = test_p.reshape((test_p.shape[1], 1, test_p.shape[0]))
    train_s=np.asarray(scoring[0:25])
    train_s = np.expand_dims(train_s, axis=0)
    train_s = train_s.reshape((train_s.shape[1], 1, train_s.shape[0]))
    test_s=np.asarray(scoring[25:33])
    test_s = np.expand_dims(test_s, axis=0)
    test_s = test_s.reshape((test_s.shape[1], 1, test_s.shape[0]))
    
    history = model.fit(train_p, train_s, epochs=50, batch_size=4, validation_data=(test_p, test_s), shuffle=False)

    
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()