In [76]:
import pandas as pd
import json
import matplotlib as plt
import plotly.express as px

xg_data = pd.read_csv("/home/adam/jupyter/fpl/outfield_xG_data.csv")

fpl_data = pd.read_json("/home/adam/jupyter/fpl/fpl_data.json")

fpl_data.drop(columns=['name'])

large_data = pd.merge(xg_data, fpl_data, how="inner", left_on=['name'], right_on=['sanitized_name'])
less_large_data = large_data.drop(columns=['name_y', 'sanitized_name'])
clean_data = less_large_data.rename(columns={'name_x': 'name', 'npgx_90': 'npxg_90'})
# Setting a minimum threshold for points scored per player
clean_data = clean_data.loc[clean_data.total_points > 50]

# TODO: vectorize this? 
def expected_points_90(row): 
    if row['position'] == 'Midfielder':
        val = (row['npxg_90'] * 5) + (row['xA_90'] * 3)
    elif row['position'] == 'Forward':
        val = (row['npxg_90'] * 4) + (row['xA_90'] * 3)
    elif row['position'] == 'Defender':
        val = (row['npxg_90'] * 6) + (row['xA_90'] * 3)
    else: 
        val = 0 
    return val

clean_data['x_attacking_points_90'] = clean_data.apply(expected_points_90, axis=1)
clean_data['x_attack_value'] = (clean_data['x_attacking_points_90'] / clean_data['cost']).round(2)


average_x_returns = clean_data.groupby('position')['x_attack_value'].quantile(.5)


# Using my own takes for minimum player value
def xValue_above_average(row): 
    if row['position'] == 'Midfielder':
        val = row['x_attack_value'] - average_x_returns.loc['Midfielder']
    elif row['position'] == 'Forward':
        val = row['x_attack_value'] - average_x_returns.loc['Forward']
    elif row['position'] == 'Defender':
        val = row['x_attack_value'] - average_x_returns.loc['Defender']
    else: 
        val = 0 
    return val


clean_data['Expected Attacking Points Per 90 Above Average'] = clean_data.apply(xValue_above_average, axis=1)

clean_data.to_csv("/home/adam/jupyter/fpl/final_expected_data.csv", index=False)



value_plot = px.scatter(clean_data,
                            x='total_points',
                            y='Expected Attacking Points Per 90 Above Average',
                           size='minutes',
                           trendline="ols",
                           color='position',
                           hover_data=['name'])
value_plot.write_html("/home/adam/jupyter/fpl/fpl_value_plot2.html")
value_plot.show()