In [15]:

import gc
import os

import numpy as np
import pandas as pd

from copy import copy
### customize to your own
DATA_PATH = 'D://Medium/'

def load_data():
    return pd.read_csv(os.path.join(DATA_PATH, 'ncaam_sample_data.csv'))

m_data = load_data()
m_data.head()


Unnamed: 0,season,team_score,opp_score,is_home,numot,team_fgm,team_fga,team_fgm3,team_fga3,team_ftm,...,opp_or,opp_dr,opp_ast,opp_to,opp_stl,opp_blk,opp_pf,team_name,opp_name,date
0,2003,68,62,0,0,27,58,3,14,11,...,10,22,8,18,9,2,20,Alabama,Oklahoma,2002-11-14
1,2003,70,63,0,0,26,62,8,20,10,...,20,25,7,12,8,6,16,Memphis,Syracuse,2002-11-14
2,2003,62,68,0,0,22,53,2,10,16,...,14,24,13,23,7,1,22,Oklahoma,Alabama,2002-11-14
3,2003,63,70,0,0,24,67,6,24,9,...,15,28,16,13,4,4,18,Syracuse,Memphis,2002-11-14
4,2003,55,81,-1,0,20,46,3,11,12,...,12,24,12,9,9,3,18,E Washington,Wisconsin,2002-11-15


In [16]:

def process(data):
    
    ## date must be a datetime. rating period also acceptable as a numeric
    data['date'] = pd.to_datetime(data['date'])
    
    ## In this example, I'm interested in the difference between two stats - score and rebounds
    data['team_reb'] = data['team_or'].copy()+data['team_dr'].copy()
    data['opp_reb'] = data['opp_or'].copy()+data['opp_dr'].copy()
    data = data.copy()[['date','season','team_name','opp_name','is_home','team_score','opp_score', 'team_reb','opp_reb']]
    data['score_diff'] = data['team_score'].copy()-data['opp_score'].copy()
    data['reb_diff'] = data['team_reb'].copy()-data['opp_reb'].copy()
    
    ## long format necessary
    ## if you are not familiar with the melt function, I would recommend reading about it
    ## https://towardsdatascience.com/reshape-pandas-dataframe-with-melt-in-python-tutorial-and-visualization-29ec1450bb02
    data =data.melt(
        id_vars=['date','season','team_name','opp_name','is_home'], 
        value_vars=['score_diff','reb_diff'], 
        var_name='stat', 
        value_name='difference'
    )
    
    data['result'] = np.where(data['difference']>0, 1, 0)
    data['result'] = np.where(data['difference']==0, 0.5, data['result'].copy())
    data = data.drop(columns=['difference'])
    data = data.sort_values(by=['date','team_name','stat']).reset_index(drop=True)
    
    return data

m_data = process(m_data)
m_data.head(10)



Unnamed: 0,date,season,team_name,opp_name,is_home,stat,result
0,2002-11-14,2003,Alabama,Oklahoma,0,reb_diff,1.0
1,2002-11-14,2003,Alabama,Oklahoma,0,score_diff,1.0
2,2002-11-14,2003,Memphis,Syracuse,0,reb_diff,0.0
3,2002-11-14,2003,Memphis,Syracuse,0,score_diff,1.0
4,2002-11-14,2003,Oklahoma,Alabama,0,reb_diff,0.0
5,2002-11-14,2003,Oklahoma,Alabama,0,score_diff,0.0
6,2002-11-14,2003,Syracuse,Memphis,0,reb_diff,1.0
7,2002-11-14,2003,Syracuse,Memphis,0,score_diff,0.0
8,2002-11-15,2003,E Washington,Wisconsin,-1,reb_diff,0.0
9,2002-11-15,2003,E Washington,Wisconsin,-1,score_diff,0.0


In [75]:

class StatefulSystem:
    def __init__(self, nodes):
        self.nodes = nodes  # to store players/teams as keys and ratings as values
        self.history = []  # to store history of predictions and results

    def predict_1v1(self, player1, player2, **kwargs):
        raise NotImplementedError  # This method should be implemented in child classes

    def update_1v1(self, player1, player2, result, **kwargs):
        raise NotImplementedError  # This method should be implemented in child classes
        
        
class PlayerNode():
    def __init__(self, vector):
        self.vector=vector
        
class EloNode(PlayerNode):
    def __init__(self,rating):
        super().__init__(rating)
        self.rating = rating
        
class EloSystem(StatefulSystem):
    def __init__(self, nodes, k_factor, meta_functions=None):
        super().__init__(nodes)
        self.nodes = nodes
        self.history = []
        self.k_factor = k_factor
        self.meta_functions = meta_functions

    def predict_1v1(self, player1, player2, **kwargs):
        # Meta information can be accessed as dictionary items, e.g., kwargs['is_home'], kwargs['days_off']
        rd = player1.rating - player2.rating
        if self.meta_functions is not None:
            ## add all adjustments for meta information
            for meta_key, meta_function in self.meta_functions.items():
                rd += meta_function(kwargs[meta_key])
        prediction = 1/(1+10**(-rd/400))
        print(prediction)

    def update_1v1(self, prediction, result):
        points_exchanged = 
        return
    def play_match(self, p1_id, p2_id, **kwargs):
        p1_node = copy(self.nodes[p1_id])
        p2_node = copy(self.nodes[p2_id])
        
        return

p0 = EloNode(1900)
p1 = EloNode(1700)
p2 = EloNode(1500)
p3 = EloNode(1300)
p4 = EloNode(1100)
elo = EloSystem(nodes={0:p0, 1:p1, 2:p2, 3:p3, 4:p4}, k_factor=25, meta_functions={'is_home':lambda x: x*50})




In [None]:
# class GlickoRatingSystem(StateSpaceRatingSystem):
#     def __init__(self):
#         super().__init__()

#     def predict(self, player1, player2, **kwargs):
#         # Implement Glicko prediction logic here
#         pass

#     def update(self, player1, player2, result, **kwargs):
#         # Implement Glicko rating update logic here
#         pass

Elo


In [38]:

def elo_score(x): return x
def elo_predict(rating_diff): return 1/(1+10**-(rating_diff/400))
def elo_update(result, prediction, **params): return params['k']*(result-prediction), -1*params['k']*(result-prediction)

class StateSpace():
    def __init__(self,
                 data,
                 rating_size, 
                 score_func,
                 predict_func, 
                 update_func, 
                 params,
                 priors=None, 
                 meta=None,
                 directed=True
                ):
        
        
        self.data=data
        self.rating_size=rating_size
        self.score_func=score_func
        self.predict_func=predict_func
        self.update_func=update_func
        self.priors=priors
        self.meta=meta
        self.directed=directed
        
        ## check for multiple stats
        if 'stat' in list(data):
            
        
        self.validation()
        
        return
    
    def validation(self):
        
        assert('result' in list(self.data))
        possible_protag_cols = ['team','team_name','team_id','home','home_team','home_team_name','home_team_id','player','player_id','player_name','protag','protagonist']
        assert(np.any([protag_col_name in list(self.data) for protag_col_name in possible_protag_cols])), f"Need one of {possible_protag_cols} in data"
        possible_antag_cols = ['opp','opponent','opp_name','opponent_name','opp_id','opponent_id','away_team','away','away_team_id','away_team_name']
        assert(np.any([antag_col_name in list(self.data) for antag_col_name in possible_antag_cols])), f"Need one of {possible_antag_cols} in data"

        
        return
    
class Elo(StateSpace):
    def __init__(
        self,
        data,
        rating_size=1, 
        score_func=elo_score,
        predict_func=elo_predict, 
        update_func=elo_update,
        params={'k':20},
        priors=None,
        meta=None): 
        
        
        self.data=data
        self.rating_size=rating_size
        self.score_func=score_func
        self.predict_func=predict_func
        self.update_func=update_func
        self.priors=None
        self.meta=None
        
        self.validation()

        return
    
    

elo = Elo(m_data)



In [12]:


elo_system = StateSpace(rating_size=)



0.9467597847979775

In [10]:

# import numpy as np

# @jit(nopython=True)
# def sum_array(arr):
#     result = 0.0
#     for x in arr:
#         result += x
#     return result

# data = np.random.rand(10000)

# # Numba function
# %timeit sum_array(data) 

# # Built-in function
# %timeit np.sum(data)
