In [1]:
import urllib.request, json
import pandas as pd
import numpy as np

In [13]:
with urllib.request.urlopen("https://fantasy.premierleague.com/api/bootstrap-static/") as url:
    data = json.loads(url.read().decode())

In [3]:
gw = pd.DataFrame(data['events'])
players = pd.DataFrame(data['elements'])
teams = pd.DataFrame(data['teams'])
player_type = pd.DataFrame(data['element_types'])

### Below data is removed on domain expertise and String Data like news,name, etc which will not contribute to our model

# Removinig the columns which would be useless for the prediction by understanding of domain
players.drop(['cost_change_event','cost_change_event_fall','team_code','transfers_in',
              'transfers_out','cost_change_start', 'cost_change_start_fall'],axis=1,inplace=True)

players.drop(['photo','news','news_added','second_name','team'],axis=1,inplace=True)

#Removing Correlated columns by understanding of domain
players.drop(['influence_rank',
       'influence_rank_type', 'creativity_rank', 'creativity_rank_type',
       'threat_rank', 'threat_rank_type', 'ict_index_rank',
       'ict_index_rank_type'],axis=1,inplace=True)

# Removing useful column which cannot be used for now. Will include it further ahead
players.drop(['in_dreamteam'], axis = 1, inplace=True)

# Same value over the whole column

for x in players.columns:
    if players[x].unique().shape[0] == 1:
        players.drop(x, axis=1, inplace = True)
        
#set an index and drop all other unique useless columns that could have been used for index. 
players.drop(['first_name','code'], axis=1, inplace=True)


## I have dropped all these columns in seperate cells and just copied the code over here. This is done because
# the dataset will be updated every week and we would have to run just one cell next time.

## Also, this project will be improved upon in the future so just keeping note of all redundant columns in cell.

In [4]:
players.columns

# Above, we have removed columns based on our domain expertise. 
# From here on we will use machine learning techniques to filter data and decrease dimensionality.

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round',
       'dreamteam_count', 'element_type', 'id', 'now_cost', 'points_per_game',
       'selected_by_percent', 'status', 'total_points', 'value_season',
       'web_name', 'minutes', 'goals_scored', 'assists', 'clean_sheets',
       'goals_conceded', 'own_goals', 'penalties_saved', 'penalties_missed',
       'yellow_cards', 'red_cards', 'saves', 'bonus', 'bps', 'influence',
       'creativity', 'threat', 'ict_index'],
      dtype='object')

In [5]:
players.to_csv('Players.csv')

## Exporting the Cleaned dataset obtained for our model.
## There we will be using ML techniques to refine our data and eventually set up 

In [6]:
# Including Points per game in the dataset to try for a new target variable(Points Per Game).
players.columns
players.to_csv('Players_ppg.csv')

In [7]:
data = pd.read_csv('epl.csv')

In [8]:
data.head()

Unnamed: 0,name,season,status,format,number_of_clubs,total_matches,matches_completed,game_week,total_game_week,progress,...,goals_min_51_to_60,goals_min_61_to_70,goals_min_71_to_80,goals_min_81_to_90,goals_min_0_to_15,goals_min_16_to_30,goals_min_31_to_45,goals_min_46_to_60,goals_min_61_to_75,goals_min_76_to_90
0,Premier League,2018/2019,Completed,,20,380,380,38,38,100,...,112,133,119,186,134,172,170,158,185,253


In [12]:
data = pd.read_csv('epl_players.csv')
data.head()

Unnamed: 0,full_name,age,birthday,league,season,position,Current Club,minutes_played_overall,minutes_played_home,minutes_played_away,...,conceded_per_90_overall,min_per_conceded_overall,min_per_match,min_per_card_overall,min_per_assist_overall,cards_per_90_overall,rank_in_league_top_attackers,rank_in_league_top_midfielders,rank_in_league_top_defenders,rank_in_club_top_scorer
0,David de Gea,29,657964800,Premier League,2018/2019,Goalkeeper,Manchester United,3420,1710,1710,...,1.34,67,90,3420,0,0.03,310,419,90,18
1,Matteo Darmian,30,628588800,Premier League,2018/2019,Defender,Manchester United,443,353,90,...,1.42,63,74,0,0,0.0,411,366,102,25
2,Victor Nilsson Lindelöf,25,774428400,Premier League,2018/2019,Defender,Manchester United,2602,1112,1490,...,1.31,68,87,2602,2602,0.03,263,249,88,14
3,Luke Shaw,24,805532400,Premier League,2018/2019,Defender,Manchester United,2592,1170,1422,...,1.25,72,89,236,648,0.38,262,104,79,13
4,Eric Bertrand Bailly,26,766134000,Premier League,2018/2019,Defender,Manchester United,637,349,288,...,1.7,53,53,319,0,0.28,340,271,142,26


In [11]:
data.columns

Index(['full_name', 'age', 'birthday', 'league', 'season', 'position',
       'Current Club', 'minutes_played_overall', 'minutes_played_home',
       'minutes_played_away', 'nationality', 'appearances_overall',
       'appearances_home', 'appearances_away', 'goals_overall', 'goals_home',
       'goals_away', 'assists_overall', 'assists_home', 'assists_away',
       'penalty_goals', 'penalty_misses', 'clean_sheets_overall',
       'clean_sheets_home', 'clean_sheets_away', 'conceded_overall',
       'conceded_home', 'conceded_away', 'yellow_cards_overall',
       'red_cards_overall', 'goals_involved_per_90_overall',
       'assists_per_90_overall', 'goals_per_90_overall', 'goals_per_90_home',
       'goals_per_90_away', 'min_per_goal_overall', 'conceded_per_90_overall',
       'min_per_conceded_overall', 'min_per_match', 'min_per_card_overall',
       'min_per_assist_overall', 'cards_per_90_overall',
       'rank_in_league_top_attackers', 'rank_in_league_top_midfielders',
       'rank_in

In [15]:
data.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

In [17]:
data['element_types']

[{'id': 1,
  'plural_name': 'Goalkeepers',
  'plural_name_short': 'GKP',
  'singular_name': 'Goalkeeper',
  'singular_name_short': 'GKP',
  'squad_select': 2,
  'squad_min_play': 1,
  'squad_max_play': 1,
  'ui_shirt_specific': True,
  'sub_positions_locked': [12],
  'element_count': 71},
 {'id': 2,
  'plural_name': 'Defenders',
  'plural_name_short': 'DEF',
  'singular_name': 'Defender',
  'singular_name_short': 'DEF',
  'squad_select': 5,
  'squad_min_play': 3,
  'squad_max_play': 5,
  'ui_shirt_specific': False,
  'sub_positions_locked': [],
  'element_count': 210},
 {'id': 3,
  'plural_name': 'Midfielders',
  'plural_name_short': 'MID',
  'singular_name': 'Midfielder',
  'singular_name_short': 'MID',
  'squad_select': 5,
  'squad_min_play': 2,
  'squad_max_play': 5,
  'ui_shirt_specific': False,
  'sub_positions_locked': [],
  'element_count': 261},
 {'id': 4,
  'plural_name': 'Forwards',
  'plural_name_short': 'FWD',
  'singular_name': 'Forward',
  'singular_name_short': 'FWD',
  