In [1]:
import numpy as np
import pandas as pd
import pulp
import json
import requests

### CUSTOM
import sys
sys.path.append('../Modules')
import LpPickTeam

In [161]:
# STANDARDIZED COLUMN LABELS
# Name, Team, Positiom, Cost, [GW]_xP, ID
def standardize_labels(df, old_name:str, old_team:str, old_position:str, old_cost:str, old_xP:str, old_id:str, gw:str):
    return df.rename(columns = {old_name : 'Name', \
                                old_team : 'Team', \
                                old_position : 'Position', \
                                old_cost : 'Cost', \
                                old_xP : gw + '_xP', \
                                old_id : 'ID'})

In [273]:
# GET FPL DATA
url = 'https://fantasy.premierleague.com/api/bootstrap-static/' # API URL
resp = requests.get(url)
data = resp.json()
df = pd.DataFrame(data['elements'])[['web_name', 'team', 'element_type', 'now_cost', 'ep_next', 'id']]
df = df.rename(columns = {'web_name' : 'Name', 'team' : 'Team', 'now_cost' : 'Cost', 'element_type' : 'PosNum', 'ep_next' : '1_xP', 'id' : 
                         'ID'})
# APPLY ONLY ONCE
position_key = {1 : 'GK', 2 : 'DEF', 3: 'MID', 4 : 'FWD'}
df['Position'] = df['PosNum'].apply(lambda num : position_key[num])
df['Cost'] = df['Cost'].apply(lambda price : price / 10)
df['Team'] = df['Team'].apply(lambda team_num : str(team_num))
df['1_xP'] = df['1_xP'].apply(lambda xP : float(xP))
df_official = df[['Name', 'Team', 'Position', 'Cost', '1_xP', 'ID']]

# STANDARDIZED KIWI DATA
df_kiwi = pd.read_csv('../Data/Projected_Points/FPLKiwi_projections.csv')[['Name', 'Team', 'Pos', 'Price', 'xPts 1', 'ID']]
df_kiwi = standardize_labels(df_kiwi, 'Name', 'Team', 'Pos', 'Price', 'xPts 1', 'ID', '1')
df_review = pd.read_csv('../Data/Projected_Points/fplreview_projections.csv')[['Name', 'Team', 'Pos', 'BV', '1_Pts', 'ID']]
df_review = standardize_labels(df_review, 'Name', 'Team', 'Pos', 'BV', '1_Pts', 'ID', '1')
# df_official, df_kiwi, df_review,
df_kiwi.Position.unique()

array(['MID', 'DEF', 'FWD', 'GK'], dtype=object)

In [163]:
team_key_df = pd.DataFrame(data['teams'])[['id', 'name', 'short_name']]
team_key_df = team_key_df.rename(columns={'id' : 'fpl_id', 'name' : 'long_name', 'short_name' : 'short_name'})
team_key_df.to_csv('../Dictionaries/team_names.csv', index=False)

In [164]:
position_key = [[1, 'GK', 'G'], [2, 'DEF', 'D'], [3, 'MID', 'M'], [4, 'FWD', 'F']]
position_key_df = pd.DataFrame(position_key, columns=['position_code', 'position_name', 'fplreview_name'])
position_key_df.to_csv('../Dictionaries/position_names.csv', index=False)

In [165]:
team_dict1 = dict(zip(team_key_df['fpl_id'].apply(str), team_key_df['short_name']))
team_dict1
team_dict2 = dict(zip(team_key_df['long_name'], team_key_df['short_name']))
team_dict2
position_dict2 = dict(zip(position_key_df['fplreview_name'], position_key_df['position_name']))
position_dict2 
position_dict1 = dict(zip(position_key_df['position_code'], position_key_df['position_name']))
position_dict1

{1: 'GK', 2: 'DEF', 3: 'MID', 4: 'FWD'}

In [166]:
position_key_df.query("fplreview_name in ('G')")['position_name'][0]

'GK'

In [167]:
# STANDARDIZED ENTRY LABEL STYLES
# Name - FPL Web Name
# Team - ShOrt Name
# Position - G, D, M, F
# Cost - In millions
# ID - FPL ID
df_official = df_official.replace({'Team' : team_dict1, 'Position' : position_dict1})
df_review = df_review.replace({'Team' : team_dict2, 'Position' : position_dict2})
# df_official['Team'].unique()

In [168]:
df_official.nlargest(columns='1_xP', n=10)

Unnamed: 0,Name,Team,Position,Cost,1_xP,ID
296,Alisson,LIV,GK,5.5,5.5,281
298,Salah,LIV,MID,13.0,5.5,283
300,Alexander-Arnold,LIV,DEF,7.5,5.5,285
299,Robertson,LIV,DEF,7.0,5.2,284
325,Ederson,MCI,GK,5.5,5.0,307
335,Haaland,MCI,FWD,11.5,5.0,318
295,Van Dijk,LIV,DEF,6.5,4.8,280
320,De Bruyne,MCI,MID,12.0,4.7,301
324,Cancelo,MCI,DEF,7.0,4.7,306
291,Matip,LIV,DEF,6.0,4.5,276


In [122]:
df_kiwi.nlargest(columns='1_xP', n=30)

Unnamed: 0,Name,Team,Position,Cost,1_xP,ID
22,Kane,TOT,FWD,11.5,6.44,427
27,Son,TOT,MID,12.0,6.38,428
0,Salah,LIV,MID,13.0,6.28,283
2,Trent,LIV,DEF,7.5,5.47,285
7,Fernandes,MUN,MID,10.0,5.1,333
3,De Bruyne,MCI,MID,12.0,4.85,301
1,Cancelo,MCI,DEF,7.0,4.84,306
12,Sterling,CHE,MID,10.0,4.76,304
37,R James,CHE,DEF,6.0,4.42,146
6,Robertson,LIV,DEF,7.0,4.41,284


In [268]:
df_review.nlargest(columns='1_xP', n=30)

Unnamed: 0,Name,Team,Position,Cost,1_xP,ID
282,Salah,Liverpool,M,13.0,7.38,283
426,Kane,Spurs,F,11.5,6.46,427
427,Son,Spurs,M,12.0,6.37,428
284,Alexander-Arnold,Liverpool,D,7.5,5.64,285
317,Haaland,Man City,F,11.5,5.42,318
292,Luis Díaz,Liverpool,M,8.0,5.12,293
300,De Bruyne,Man City,M,12.0,5.0,301
332,Fernandes,Man Utd,M,10.0,4.91,333
283,Robertson,Liverpool,D,7.0,4.9,284
355,Wilson,Newcastle,F,7.5,4.88,356


In [24]:
sorted(df_kiwi.Team.unique())

['ARS',
 'AVL',
 'BHA',
 'BOU',
 'BRE',
 'CHE',
 'CRY',
 'EVE',
 'FUL',
 'LEE',
 'LEI',
 'LIV',
 'MCI',
 'MUN',
 'NEW',
 'NFO',
 'SOU',
 'TOT',
 'WHU',
 'WOL']

In [None]:
d

In [41]:
key_df = pd.DataFrame(data['teams'])[['id', 'name', 'short_name']]
key_df.rename(columns={'id' : 'fpl_id', 'name' : 'full_name', 'short_name' : 'short_name'})
key_df.to_csv('../Dictionaries/team_names.csv', index=False)

In [35]:
df_review.Team.unique()

array(['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton',
       'Man City', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham',
       'Leeds', 'Leicester', 'Liverpool', "Nott'm Forest", 'Southampton',
       'Man Utd', 'Newcastle', 'Spurs', 'West Ham', 'Wolves'],
      dtype=object)

In [135]:
pd.DataFrame(data['elements'])[['id', 'first_name', 'second_name', 'web_name']].query("second_name in ('Trent', 'Mendy')")

Unnamed: 0,id,first_name,second_name,web_name
153,147,Edouard,Mendy,Mendy
238,252,Nampalys,Mendy,Mendy


In [137]:
name_df = pd.DataFrame(data['elements'])[['id', 'first_name', 'second_name', 'web_name']]

In [140]:
id_to_name_dict = dict(zip(name_df['id'], name_df['web_name']))

In [174]:
df_kiwi = df_kiwi.rename(columns={'Name' : 'kiwi_Name'})
df_kiwi['Name'] = df_kiwi['ID'].replace(id_to_name_dict)

In [182]:
# CLEAN BAD
df_kiwi = df_kiwi.query("ID >= 1")

In [188]:
def jaccard_similarity(enum1, enum2):
    set1 = set(list(enum1))
    set2 = set(list(enum2))
    return len(set1.intersection(set2)) / len(set1.union(set2))

In [190]:
jaccard_similarity(df_kiwi.Name.unique(), df_review.Name.unique())

0.8399311531841652

In [242]:
set(df_kiwi.Name.unique()).difference(set(df_review.Name.unique()))

{'Augustinsson',
 'Francois',
 'Koulibaly',
 'Lingard',
 'Mee',
 "O'Brien",
 'Salah',
 'Spence',
 'Toffolo'}

In [244]:
set(df_review.Name.values).difference(set(df_official.Name.values))

set()

In [233]:
df_review[df_review['Name'].str.contains('Rossi')]

Unnamed: 0,Name,Team,Position,Cost,1_xP,ID
568,Zeno Ibsen Rossi,BOU,DEF,99.9,0.01,10256


In [271]:
# CLEAN BAD
df_review = df_review.query("0 < Cost < 13")

In [246]:
len(df_official), len(df_kiwi), len(df_review)

(527, 512, 518)

In [255]:
def row_calc(row):
    return row == df_official['Name']

In [257]:
name_df = pd.DataFrame(data['elements'])[['id', 'first_name', 'second_name', 'web_name']]
name_dict = dict(zip(name_df['web_name'], name_df['id']))

In [274]:
df_review.replace({'Name' : name_dict}) # WOW AFTER CLEANING THE IDs ALL MATCH!!!

Unnamed: 0,Name,Team,Position,Cost,1_xP,ID
0,1,Arsenal,D,4.5,0.34,1
1,2,Arsenal,G,4.5,0.14,2
2,3,Arsenal,M,5.0,2.33,3
3,4,Arsenal,M,4.5,1.01,4
4,5,Arsenal,D,4.5,0.35,5
...,...,...,...,...,...,...
586,Ludwig Augustinsson,Aston Villa,D,99.9,0.00,10289
587,Kalidou Koulibaly,Chelsea,D,99.9,3.48,10293
588,Frenkie de Jong,Man Utd,M,99.9,1.64,10294
589,Lisandro Martínez,Man Utd,D,99.9,1.73,10295


In [269]:
df_review.nlargest(columns='1_xP', n=10)

Unnamed: 0,Name,Team,Position,Cost,1_xP,ID
282,Salah,Liverpool,M,13.0,7.38,283
426,Kane,Spurs,F,11.5,6.46,427
427,Son,Spurs,M,12.0,6.37,428
284,Alexander-Arnold,Liverpool,D,7.5,5.64,285
317,Haaland,Man City,F,11.5,5.42,318
292,Luis Díaz,Liverpool,M,8.0,5.12,293
300,De Bruyne,Man City,M,12.0,5.0,301
332,Fernandes,Man Utd,M,10.0,4.91,333
283,Robertson,Liverpool,D,7.0,4.9,284
355,Wilson,Newcastle,F,7.5,4.88,356
