In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import pickle
sns.set()
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 150)

In [2]:
df = pd.read_pickle('/Users/andrewpeters/GitHub/fpl/data/interim/forward_rolling_df.pkl')

In [3]:
df = df[df['total_points'] >= 0] #possible, but rare, to get negative points. Will remove negative points for ease of use
df['past_three_avg_mins'] = (df['minutes_prev'] + df['minutes_prev_2'] + df['minutes_prev_3'])/3
df = df[df['past_three_avg_mins'] > 60]
df = df[df['minutes'] > 40] #minutes played is very much after-the-fact knowledge, but usually we know who won't even be playing 40 minutes


In [4]:
def prep_final_features(df):
    df['was_home'] = df['was_home'].astype('int') #convert bool to numeric
    positions_df = pd.get_dummies(df['position'], prefix='position')
    df = df.join(positions_df)
    return df

In [5]:
df = prep_final_features(df)

In [6]:
df = df.dropna()

In [7]:
#remove top .05% of performance 
df = df.sort_values('total_points_next_3', ascending=False).iloc[round(len(df)*.005):]

In [12]:
metadata_cols = ['player', 'minutes', 'team', 'position','opponent_team', 'gw', 'kickoff_time', 'season']

In [13]:
metadata = df[metadata_cols]

In [15]:
X = df.drop(columns = metadata_cols + ['past_three_avg_mins', 'minutes', 'team_h_score_prev', 'team_a_score_prev', 'total_points'])
X = X.drop(columns = ['total_points_next_3'])

In [16]:
y = df['total_points_next_3']

In [17]:
#save the final features names -- I'll use this when predicting future data to make sure I have the correct features
with open('/Users/andrewpeters/GitHub/fpl/data/interim/x_features_forward_rolling.pkl', 'wb') as f:
    pickle.dump(list(X), f)

In [18]:
with open('/Users/andrewpeters/GitHub/fpl/data/processed/X_forward_rolling.npy', 'wb') as file:
    np.save(file, X)

In [19]:
with open('/Users/andrewpeters/GitHub/fpl/data/processed/y_forward_rolling.npy', 'wb') as file:
    np.save(file, y)

In [23]:
with open('/Users/andrewpeters/GitHub/fpl/data/processed/metadata_forward_rolling.npy', 'wb') as file:
    np.save(file, metadata)

In [20]:
X.shape

(27543, 319)

In [21]:
metadata.shape

(27543, 8)