In [15]:
import pandas as pd
import numpy as np
import pickle
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from datetime import datetime
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [2]:
dateparse = lambda x: datetime.strptime(x, '%Y/%m/%d') if isinstance(x, str) else x

In [3]:
# Load the data
data = pd.read_excel('SignedPlayersDatasetVALORANT.xlsx', parse_dates=['date'], date_parser=dateparse)
# Fix name errors
data = data.replace('\n','', regex=True)
data = data.replace(' ','', regex=True)
#test_data = pd.read_excel('Test Dataset VALORANT VCT DATASET.xlsx', parse_dates=['date'], date_parser=dateparse).dropna(how='any',axis=0)

In [4]:
# Select features and target variable
input_features = ['date', 'player']
features = ['kills_per_round', 'assists_per_round', 'average_damage_per_round',
            'first_kills_per_round', 'first_deaths_per_round', 'headshot_percentage',
            'clutch_success_percentage', 'total_kills', 'total_deaths', 'total_assists',
            'total_first_kills', 'total_first_deaths']
target_variable = 'average_combat_score'
#Note: create models w/ target variable for rounds and rating

In [5]:
"""
# Split variables
X_train = data[features]
X_test = test_data[features]
y_train = data[target_variable]
y_test = test_data[target_variable]
"""
None

In [6]:
# Get dates and players
dates = data['date'].unique()
players = data['player'].unique()

In [7]:
# For getting difference in months
def diff_month(d1, d2):
    return (d1.year - d2.year) * 12 + d1.month - d2.month

def months_from_2021(dates):
    months_from_Jan_2021 = []
    for date in dates:
        months_from_Jan_2021.append(diff_month(date, datetime.strptime('2021/01/01', '%Y/%m/%d')))
    return months_from_Jan_2021

In [8]:
data = data.assign(months_from_Jan_2021 = months_from_2021(data['date']))

In [9]:
#Create regression model for each feature for each players
#Note: Unknown players don't work.
#Note: Used decision tree regression due to small sample per player. Problem stops predicting past latest event
player_feature_models = {}
for player in players:
    player_feature_model = {}
    player_data = data.loc[data['player'] == player]
    for feature in features:
        feature_model = DecisionTreeRegressor(max_depth=3).fit(player_data['months_from_Jan_2021'].values[:,np.newaxis], player_data[feature])
        player_feature_model.update({feature:feature_model})
    player_feature_models.update({player:player_feature_model})

In [10]:
#Create regression model for each feature for each players
#Note: Unknown players don't work.
#Note: Used decision tree regression due to problems in prediction. Can cause negative values
player_feature_models_linear = {}
for player in players:
    player_feature_model = {}
    player_data = data.loc[data['player'] == player]
    for feature in features:
        feature_model = LinearRegression().fit(player_data['months_from_Jan_2021'].values[:,np.newaxis], player_data[feature])
        player_feature_model.update({feature:feature_model})
    player_feature_models_linear.update({player:player_feature_model})

In [11]:
#Create regression model for each feature for each players
#Note: Unknown players don't work.
#Note: XGboost models can get very heavy in file size
player_feature_models_XGB = {}
for player in players:
    player_feature_model = {}
    player_data = data.loc[data['player'] == player]
    for feature in features:
        feature_model = XGBRegressor().fit(player_data['months_from_Jan_2021'].values[:,np.newaxis], player_data[feature])
        player_feature_model.update({feature:feature_model})
    player_feature_models_XGB.update({player:player_feature_model})

In [None]:
#Note uses beta regression which limits the max/min values 
player_feature_models_beta = {}
for player in players:
    player_feature_model = {}
    player_data = data.loc[data['player'] == player]
    for feature in features:
        feature_min = data['feature'].min() * 1.1
        feature_max = data['feature'].max() * 1.1
        # Transform the response variable to be between 0 and 1
        data[feature] = (data[feature] - feature_min) / (feature_max - feature_min)
        feature_model = XGBRegressor().fit(player_data['months_from_Jan_2021'].values[:,np.newaxis], player_data[feature])
        player_feature_model.update({feature:feature_model})
    player_feature_models_beta.update({player:player_feature_model})

In [12]:
#Save Models
filename = 'player_feature_models.sav'
pickle.dump(player_feature_models, open(filename, 'wb'))

In [13]:
filename = 'player_feature_models_linear.sav'
pickle.dump(player_feature_models_linear, open(filename, 'wb'))

In [14]:
filename = 'player_feature_models_XGB.sav'
pickle.dump(player_feature_models_XGB, open(filename, 'wb'))

In [None]:
#Note: Find out if functions can be appended to model.