In [1]:
import pandas as pd
from sklearn.metrics import r2_score

In [2]:
from dataset import get_dataset
from game_information import MIN_GAMES, MIN_SEASON_PPG, SEASON_LENGTH, MIN_SEASON_GAME_PERCENTAGE
from ai import do_arima, do_lstm, MAX_DIFF

In [3]:
BUGGED_PLAYERS = []

In [4]:
points_data_set, _ = get_dataset()

In [5]:
points_df = pd.DataFrame.from_dict(points_data_set, orient='index')
points_df.head()

Unnamed: 0,first_name,last_name,name,team,id,position,GW77,GW78,GW80,GW81,...,GW299,GW195,GW107,GW217,GW223,GW224,GW190,GW229,GW218,GW304
387,Aaron,Wan-Bissaka,Wan-Bissaka,MUN,387,DEF,"{'diff': 2, 'points': 12, 'team': 'FUL'}","{'diff': 4, 'points': -2, 'team': 'LIV'}","{'diff': 2, 'points': 0, 'team': 'NFO'}","{'diff': 2, 'points': 9, 'team': 'IPS'}",...,,,,,,,,,,
216,Abdoulaye,Doucouré,A.Doucoure,EVE,216,MID,"{'diff': 2, 'points': 3, 'team': 'BOU'}","{'diff': 3, 'points': 6, 'team': 'BRE'}","{'diff': 4, 'points': 0, 'team': 'SOU'}","{'diff': 4, 'points': 8, 'team': 'MUN'}",...,,,,,,,,,,
80,Adam,Smith,Smith,BOU,80,DEF,"{'diff': 2, 'points': 6, 'team': 'BHA'}","{'diff': 3, 'points': 2, 'team': 'WHU'}","{'diff': 4, 'points': 2, 'team': 'CHE'}","{'diff': 3, 'points': 7, 'team': 'LEI'}",...,"{'diff': 3, 'points': 0, 'team': 'IPS'}",,,,,,,,,
468,Alex,McCarthy,McCarthy,SOU,468,GKP,"{'diff': 2, 'points': 11, 'team': 'BRE'}","{'diff': 3, 'points': 2, 'team': 'EVE'}","{'diff': 2, 'points': 8, 'team': 'CRY'}","{'diff': 2, 'points': 0, 'team': 'BOU'}",...,,"{'diff': 4, 'points': 5, 'team': 'FUL'}",,,,,,,,
309,Alisson,Ramses Becker,A.Becker,LIV,309,GKP,"{'diff': 3, 'points': 7, 'team': 'WHU'}","{'diff': 2, 'points': 6, 'team': 'CRY'}","{'diff': 3, 'points': 3, 'team': 'LEI'}","{'diff': 4, 'points': 2, 'team': 'SOU'}",...,,,"{'diff': 2, 'points': 2, 'team': 'FUL'}","{'diff': 4, 'points': 2, 'team': 'SOU'}","{'diff': 2, 'points': 6, 'team': 'FUL'}","{'diff': 2, 'points': 10, 'team': 'BHA'}",,,,


In [6]:
def process_player(player_data):
    points_sum = 0
    num_games = 0
    total_games = 0
    
    gws = []
    
    for dataset, data in player_data.items():
        if not dataset.startswith('GW') or not isinstance(data, dict):
            continue
            
        total_games += 1
        gws.append(data)
        
        round_num = int(dataset.replace('GW', ''))
        beginning_round = 273
                
        if round_num >= beginning_round:
            points_sum += data['points']
            num_games += 1
            
    calibrate_by = 10
            
    if total_games - calibrate_by < MIN_GAMES or points_sum < MIN_SEASON_PPG * num_games or num_games < SEASON_LENGTH * MIN_SEASON_GAME_PERCENTAGE:
        return [], [], [], 0
    
    pred_by = []
    training_player_data = {'position': player_data['position']}
    
    for gw_num in range(0, len(gws) - calibrate_by + 1):
        training_player_data[f"GW{gw_num + 1}"] = gws[gw_num]
        
    actual_points = []
    for gw in gws[gw_num:]:
        pred_by.append(gw['diff'])
        actual_points.append(gw['points'])
        
    if sum(actual_points) < calibrate_by * MIN_SEASON_PPG:
        return [], [], [], 0
    
    if sum(actual_points) <= 0 or len(gws[:-calibrate_by]) < calibrate_by:
        return [], [], [], 0
    else:
        try:
            arima = do_arima(list(map(lambda x: x['points'], gws[:-calibrate_by])), pred_by)
            lstm = do_lstm(training_player_data, pred_by)
        except:
            return [], [], [], 0
        
        if sum(arima) != 0 and sum(lstm) != 0 and (sum(arima) / sum(lstm) > MAX_DIFF or sum(lstm) / sum(arima) > MAX_DIFF):
            return [], [], [], 0
        
        return actual_points, arima, lstm, calibrate_by

In [7]:
player_datas = []
arima_pred = []
lstm_pred = []
actual_val = []
calibrate_by = []

In [8]:
for index, player_data in points_df.iterrows():
    actual, arima, lstm, cal = process_player(player_data)
    if len(actual) > 0:
        player_datas.append(player_data)
        arima_pred.append(arima[0])
        lstm_pred.append(lstm[0])
        actual_val.append(actual[0])
        calibrate_by.append(cal)

In [9]:
results = pd.DataFrame()
results['player'] = player_datas
results['arima'] = arima_pred
results['lstm'] = lstm_pred
results['actual'] = actual_val
results['calibrate by'] = calibrate_by
results

Unnamed: 0,player,arima,lstm,actual,calibrate by
0,first_name Abdoulaye last_name Douco...,3.101190,2.868421,2,10
1,first_name Adam last_name Smith name ...,2.640304,2.596128,6,10
2,first_name ...,4.287293,3.097384,14,10
3,first_name ...,3.265267,3.592019,1,10
4,first_name ...,3.025000,3.027827,9,10
...,...,...,...,...,...
145,first_name Joe last_name Gomez name ...,2.500000,2.172567,1,10
146,first_name ...,1.857143,1.890368,1,10
147,first_name ...,1.315789,1.010912,1,10
148,first_name Rasmus last_name Højlund na...,3.315789,2.253943,2,10
