**Install python dependencies**

In [None]:
!pip install -q -r ./dependencies/requirements.txt

**Load python libraries**

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
import plotly.express as px

%matplotlib inline
plt.style.use('seaborn')

**Show smoothing methods for Robert Lewandowski passes**

In [None]:
df = pd.read_csv('./data/data_rl9_pass.csv')

df['occurences'] = df['count']

df = df[['name', 'matchday', 'event_type', 'occurences']]

df.set_index('matchday', inplace=True)

df.plot(color='green', linewidth=3, figsize=(12,6))

In [None]:
# simple moving average of 5 and 10
df['SMA_5'] = df.occurences.rolling(5, min_periods=1).mean()
df['SMA_10'] = df.occurences.rolling(10, min_periods=1).mean()

colors = ['green', 'red', 'purple']

df[['occurences', 'SMA_5', 'SMA_10']].plot(color=colors, linewidth=3, figsize=(12,6))
# --> SMA_5


In [None]:
# cumulative moving average
df['CMA'] = df.occurences.expanding().mean()

colors = ['green', 'orange']

df[['occurences', 'CMA']].plot(color=colors, linewidth=3, figsize=(12,6))

In [None]:
# exponential moving average with different alphas
df['EMA_0.1'] = df.occurences.ewm(alpha=0.1, adjust=False).mean()
df['EMA_0.3'] = df.occurences.ewm(alpha=0.3, adjust=False).mean()
df['EMA_0.5'] = df.occurences.ewm(alpha=0.5, adjust=False).mean()

colors = ['green', 'blue', 'orchid', 'pink']

df[['occurences', 'EMA_0.1', 'EMA_0.3', 'EMA_0.5']].plot(color=colors, linewidth=3, figsize=(12,6), alpha=0.8)
# --> EMA_0.5

In [None]:
# compare averages
colors = ['green', 'red', 'orange', 'orchid']

df[['occurences', 'SMA_5', 'CMA', 'EMA_0.5']].plot(color=colors, linewidth=3, figsize=(12,6), alpha=0.8)

# --> EMA_0.5


**Calculate Smoothing Accuracy**

In [None]:
def calculate_averages(df):
    df['occurences'] = df['count']
    df.set_index('matchday', inplace=True)

    # simple moving average of 5 and 10
    df['SMA_5'] = df.occurences.rolling(5, min_periods=1).mean()
    df['SMA_10'] = df.occurences.rolling(10, min_periods=1).mean()

    # cumulative moving average
    df['CMA'] = df.occurences.expanding().mean()

    # exponential moving average with different alphas
    df['EMA_0.1'] = df.occurences.ewm(alpha=0.1, adjust=False).mean()
    df['EMA_0.3'] = df.occurences.ewm(alpha=0.3, adjust=False).mean()
    df['EMA_0.5'] = df.occurences.ewm(alpha=0.5, adjust=False).mean()

    return df

def calculate_regression_accuracies(df, df_reg_acc, columns):

    player_name = df['name'].max()
    event_type = df['event_type'].max()

    for column in columns:
        pred_column_name = column + '_pred'
        df[pred_column_name] = df[column].shift(1)

        # drop first 10 rows, learning
        df_pred = df.tail(df.index.max() - 10)

        occurences = df_pred['occurences'].to_numpy()
        predictions = df_pred[pred_column_name].to_numpy()

        mae = metrics.mean_absolute_error(occurences, predictions)
        mse = metrics.mean_squared_error(occurences, predictions)
        rmse = np.sqrt(mse) 
        r2 = metrics.r2_score(occurences, predictions)

        df_reg_acc = df_reg_acc.append({'player_name': player_name, 'event_type': event_type, 'Regression': column, 'MAE': mae, 'MSE': mse, 'RMSE': rmse, 'R-Squared': r2}, ignore_index=True)

    return df_reg_acc

In [None]:
df = pd.read_csv('./data/data.csv')

df.head(5)

In [None]:
df_reg_acc = pd.DataFrame(columns= ['player_name', 'event_type', 'Regression', 'MAE', 'MSE', 'RMSE', 'R-Squared'])
columns = ['SMA_5', 'SMA_10', 'CMA', 'EMA_0.1', 'EMA_0.3', 'EMA_0.5', 'occurences']

# drop betting odds
df = df[['name', 'matchday', 'event_type', 'count']]

df_players = df.groupby("name")

for player_tuple in df_players:
    player_name = player_tuple[0]
    df_player = player_tuple[1]

    df_player = df_player.sort_values(by=['matchday', 'event_type'])

    df_events = df_player.groupby("event_type")
    
    for event_tuple in df_events:
        event_name = event_tuple[0]
        df_event = event_tuple[1]

        df_event = calculate_averages(df_event)
        df_reg_acc = calculate_regression_accuracies(df_event, df_reg_acc, columns)

df_reg_acc.to_csv('./data/reg_acc.csv')

In [None]:
df_reg_acc = pd.read_csv('reg_acc.csv')

# drop 0 rows
df_reg_acc = df_reg_acc.drop(df_reg_acc[(df_reg_acc.MAE == 0) & (df_reg_acc.MSE == 0) & (df_reg_acc.RMSE == 0) & (df_reg_acc['R-Squared'] == 1)].index)

df_reg_acc = df_reg_acc[['player_name' ,'event_type','Regression','MAE','MSE','RMSE','R-Squared']]

# df_reg_acc.groupby(["event_type", "Regression"]).median()

df_pass_r = df_reg_acc.loc[df_reg_acc['event_type'] == 'pass'][['player_name', 'Regression', 'R-Squared']]

fig = px.box(df_pass_r, x="Regression", y="R-Squared")
fig.update_layout(yaxis_range=[-1,1])
fig.show()