In [1]:
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', 500)

In [2]:
data_path = './data'
idlist_path = '/player_idlist.csv'
players_path = '/players'
season = '/2018-19'

In [3]:
idlist = pd.read_csv(data_path + season + idlist_path)

In [4]:
#create players
players = pd.DataFrame()
for index, row in idlist.iterrows():
    first_name = row['first_name']
    second_name = row['second_name']
    player_id = row['id']
    player_path = '/' +first_name + '_' + second_name + '_' + str(player_id)
    
    player = pd.read_csv(data_path + season + players_path + player_path + '/gw.csv')
    player['gw'] = np.arange(len(player)) + 1
    player['player_id'] = player_id
    players = pd.concat([players, player], axis=0)

In [5]:
#load teams data
teams = pd.read_csv(data_path + season + '/teams_raw.csv')
teams.rename(columns={'id': 'team_id'}, inplace=True)

#load element_types data
element_types = pd.read_csv(data_path + season + '/element_types_raw.csv')
element_types.drop(columns=['plural_name', 'plural_name_short'],inplace=True)
element_types.rename(columns={'id':'element_type_id', 'singular_name': 'position_name', 'singular_name_short' : 'position_short'}, inplace=True)

#load player_cleaned data
cleaned_players = pd.read_csv(data_path + season + '/cleaned_players.csv')
cleaned_players.rename(columns={'id':'player_id'}, inplace=True)

In [6]:
#prepare players data
players = pd.merge(players, cleaned_players.loc[:,['player_id', 'first_name', 'second_name', 'team', 'element_type']], on='player_id')

#merge team name
players = pd.merge(players, teams.loc[:, ['team_id', 'name']], left_on='team', right_on='team_id')
players.drop(columns=['team_id', 'team'], inplace=True)
players.rename(columns={'name':'team'}, inplace=True)

#merge element type 
players = pd.merge(players, element_types, left_on='element_type', right_on='element_type_id')
players.drop(columns=['element_type', 'element_type_id'], inplace=True)
players.rename(columns={'position_name':'position'}, inplace=True)

#rename total_points to points
players.rename(columns={'total_points':'points'}, inplace=True)



In [7]:
#swap columns
players = pd.concat([players.iloc[:, -6:], players.iloc[:, :-6]], axis=1)

In [8]:
salah = players.loc[players['player_id'] == 253, :]

In [9]:
stats_by_player_ids = players.groupby("player_id", as_index=False).agg({
    'points' : np.sum
})

In [10]:
top_by_points = stats_by_player_ids.sort_values('points', ascending=False)[:100]
top_by_points = pd.merge(top_by_points, cleaned_players.loc[:, ['player_id', 'second_name']], on="player_id")


In [18]:
top_by_points.head()

Unnamed: 0,player_id,points,second_name
0,253,149,Salah
1,122,136,Hazard
2,23,132,Aubameyang
3,372,130,Kane
4,270,122,Sterling


In [12]:
captains = players.loc[[player_id in list(top_by_points.player_id) for player_id in players.player_id], :]
captain_attackers = captains.loc[(captains.position_short == 'FWD') | (captains.position_short == 'MID'), :]
captain_defenders = captains.loc[(captains.position_short == 'DEF') | (captains.position_short == 'GKP'), :]

In [13]:
def ict_mean_by_was_home(player_id):
    player = players.loc[players.player_id == player_id, :]
    home = player.loc[player.was_home == True, :]
    away = player.loc[player.was_home == False, :]
    return home['ict_index'].mean(), away['ict_index'].mean()

def influence_mean_by_was_home(player_id):
    player = players.loc[players.player_id == player_id, :]
    home = player.loc[player.was_home == True, :]
    away = player.loc[player.was_home == False, :]
    return home['influence'].mean(), away['influence'].mean()

def creativity_mean_by_was_home(player_id):
    player = players.loc[players.player_id == player_id, :]
    home = player.loc[player.was_home == True, :]
    away = player.loc[player.was_home == False, :]
    return home['creativity'].mean(), away['creativity'].mean()

def threat_mean_by_was_home(player_id):
    player = players.loc[players.player_id == player_id, :]
    home = player.loc[player.was_home == True, :]
    away = player.loc[player.was_home == False, :]
    return home['threat'].mean(), away['threat'].mean()

In [124]:
def get_n_gw_before_ict(player_id, n):
    player = players.loc[players.player_id == player_id, :]
    if(player.shape[0] > n):
        if(n == 1):
            return player['ict_index'].shift(1)
        else:
            return player['ict_index'].shift().rolling(min_periods=n, window=n).mean()
    else:
        return np.nan


In [125]:
salah = players.loc[players.player_id ==253, :]
salah['xxx'] = get_n_gw_before_ict(253, 2)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [123]:
s.shift().rolling(min_periods=3, window=3).mean()

0    NaN
1    NaN
2    NaN
3    2.0
4    3.0
5    4.0
dtype: float64