In [62]:
import copy
import json
import math
import scipy
import statistics
import random
import sklearn.model_selection

In [2]:
ranked_data_path = f"bigresponse.json"
ranked_data_file = open(ranked_data_path,encoding="utf-8")
ranked_data = json.load(ranked_data_file)

In [3]:
players = ranked_data["players"]
maps = ranked_data["maps"]
scores = ranked_data["scores"]
filtered_scores = [score for score in scores if score["accuracy"] < 100]

In [4]:
players_by_id = {player["id"] : player for player in players}
maps_by_id = {bmap["id"] : bmap for bmap in maps}

In [5]:
scores_by_player_id = {}
scores_by_map_id = {}

def add_score(score):
    player_id = score["playerId"]
    map_id = score["leaderboardId"]
    
    player_map = scores_by_player_id.get(player_id,{})
    player_map[map_id] = score
    
    scores_by_player_id[player_id] = player_map
    
    map_map = scores_by_map_id.get(map_id,{})
    map_map[player_id] = score
    
    scores_by_map_id[map_id] = map_map
    
#for score in scores:
for score in filtered_scores:
    add_score(score)

In [6]:
modifiers = {"SF":1.2,
             "GN":1,
             "SA":1.05,
             "PM":1,
             "IF":1,
             "NO":0.5,
             "BE":1,
             "SS":0.75,
             "FS":1.05,
             "NB":0.5,
             "SC":1,
             "OD":1,
             "DA":1,
             "CS":1,
             "NA":0.5,
             "OP":0.5}
"""
"SF" - Super Fast,
"GN" - Ghost Notes,
"SA" - Strict Angles,
"PM" - Pro Mode,
"IF" - 1 life,
"NO" - No obstacles (no walls),
"BE" - Battery Energy,
"SS" - Slow song,
"FS" - Fast song,
"NB" - No bombs,
"SC" - Small Notes,
"OD" - Old Dots,
"DA" - Disappearing Arrows,
"CS" - ???,
"NA" - No Arrows -50%,
"OP" - Out of platform -50% 
"""

def modified_score(score,score_modifiers):
    final_score = score
    for modifier,multiplier in modifiers.items():
        if modifier in score_modifiers:
            if multiplier > 1:
                final_score = 1 - (1 - final_score)*(2 - multiplier)
            else:
                final_score = final_score * multiplier
            
    return final_score

In [7]:
"""
This is a mapping on the probability space rather than the population space.
That is, from (0,1) to (0,1). A probability distribution on the (0,1) interval.
We can do this with the Beta distribution, though other distributions might work.

I approximate a value for alpha and beta using this: https://homepage.divms.uiowa.edu/~mbognar/applets/beta.html
Trying to get the median around 0.9 and the probability close to 0 for 0.5, but a little bit higher for 0.65ish.
Beat Saber scores are typically at least 65%, on median probably around 0.9. They also must go down as the value
approaches 1, so beta must be greater than 1.

alpha = 10 and beta = 1.25 seemed to work quite well.
"""
beta_alpha = 10
beta_beta = 1.25

def beta_value(perc_value, beta_alpha=beta_alpha, beta_beta=beta_beta):
    return scipy.stats.beta.cdf(perc_value, beta_alpha, beta_beta)

def perc_beta_value(beta_value,beta_alpha=beta_alpha, beta_beta=beta_beta):
    return scipy.stats.beta.ppf(beta_value, beta_alpha, beta_beta)

"""
After renormalizing with the beta distribution, we apply the probit value, which assumes it's centred around 0.5.

We renormalize this to a value that is more typical of what we usually understand in Beat Saber.
For example, similar to star values. This is not really meant to be accurate, though.
"""

probit_mean = 7
probit_sd = 3

def probit_value(perc_value, probit_mean=probit_mean, probit_sd = probit_sd):
    return scipy.stats.norm.ppf(beta_value(perc_value),loc=probit_mean,scale=probit_sd)

def perc_probit_value(probit_value, probit_mean=probit_mean, probit_sd = probit_sd):
    return perc_beta_value(scipy.stats.norm.cdf(probit_value,loc=probit_mean,scale=probit_sd))

In [11]:
"""
We take the beta renormalization of probabilities above, and instead of using a probit function,
we map it to (0,inf) using a lognorm distribution.

There isn't a strong justification for this (other than the support), but there are some arguments.
What we want is to transform the probabilities into positive values that we can then multiply and divide in a way
that behaves well. Multiplying two values in the lognorm support produces a new lognorm value that is
the result of combining the other two in an independent way.
The shape of the lognorm also matches the way that we expect these values to look.
"""

# This parameterization is NOT a mistake. Read up on lognorm parameters and try to figure out why I did this.
# It isn't something terribly meaningful.
#lognorm_mean = 7
lognorm_mean = 10
#lognorm_sd = 7
lognorm_sd = 10
log_lognorm_sd = math.log(math.log(lognorm_sd))

def lognorm_value(perc_value, lognorm_mean=lognorm_mean, lognorm_sd=lognorm_sd, log_lognorm_sd=log_lognorm_sd):
    return scipy.stats.lognorm.ppf(beta_value(perc_value),s=log_lognorm_sd,loc=0,scale=lognorm_mean)

def perc_lognorm_value(lognorm_value, lognorm_mean=lognorm_mean, lognorm_sd=lognorm_sd, log_lognorm_sd=log_lognorm_sd):
    return perc_beta_value(scipy.stats.lognorm.cdf(lognorm_value,s=log_lognorm_sd,loc=0,scale=lognorm_mean))

In [12]:
"""
Very similar idea to the above, but with a maximum value.

There are some issues with this, like that a 100% score could lower a player's skill level
if it is on a very easy map. But this is very unlikely with the numbers, and 100% scores
are exceedingly rare anyway. There are also ways to deal with that later after estimating
difficulties.
"""

# truncexp_max = 25
truncexp_max = 100
# truncexp_base_mean = 7
truncexp_base_mean = 10

def truncexp_value(perc_value,base_mean=truncexp_base_mean):
    return scipy.stats.truncexpon.ppf(beta_value(perc_value),b=truncexp_max,scale=base_mean)

def perc_truncexp_value(truncexp_value,base_mean=truncexp_base_mean):
    return perc_beta_value(scipy.stats.truncexpon.cdf(truncexp_value,b=truncexp_max,scale=base_mean))

In [13]:
"""
We normalize around a value of 7.
This means that a X star player on a 7 accability map achieves an X star score
and a 7 star player on an X accability map achieves an X star score.
"""

#linear_mean = 7
linear_mean = 10

def score_linear(skill,accability,linear_mean = linear_mean):
    return skill * accability / linear_mean

def accability_linear(skill,score,linear_mean = linear_mean):
    return score / skill * linear_mean

def skill_linear(accability,score,linear_mean = linear_mean):
    return score / accability * linear_mean

In [14]:
def abs_prop_error(value,evalue):        
    return abs((value-evalue)/evalue)

In [15]:
"""
Let's start with something simple: Just use the median instead of the average.
"""

def aggregation_median(scores,default_value=1):
    if scores == []:
        return default_value
    else:
        return statistics.median(scores)
    
def aggregation_median_f(default_value):
    def f(scores):
        return aggregation_median(scores,default_value)
    
    return f

In [16]:
"""
Do the average of the top percentage of scores.
"""
def aggregation_topscores(scores,perc=0.25,default_value=1):
    scores.sort()
    l = len(scores)
    n = math.floor(l*perc)
    
    if n == 0:
        return default_value
    else:
        return statistics.mean(scores[l-n:])
    
def aggregation_topscores_f(perc,default_value):
    def f(scores):
        return aggregation_topscores(scores,perc,default_value)
    
    return f

In [17]:
"""
Do the average of the bottom percentage of scores.
"""
def aggregation_bottomscores(scores,perc=0.25,default_value=1):
    scores.sort()
    l = len(scores)
    n = math.floor(l*perc)
    
    if n == 0:
        return default_value
    else:
        return statistics.mean(scores[0:n])
    
def aggregation_bottomscores_f(perc,default_value):
    def f(scores):
        return aggregation_bottomscores(scores,perc,default_value)
    
    return f

In [73]:
class BiPartiteStabilizer:
    """No description yet"""
    
    def __init__(self,
                 anodes_ratings,bnodes_ratings,
                 wdata,afun,bfun,wfun,
                 aggregation_fun=aggregation_median,
                 default_rating=1,
                 max_iter=50,
                 error_fun=abs_prop_error,
                 error_aggregation_fun=aggregation_bottomscores,
                 finish_early=True,
                 error_change_prop=0.001):
        """
        anodes_ratings and bnodes_ratings must be dictionaries with the node identifiers as keys
        and initial ratings as values.
        
        wdata must be a doubly indexed dictionary with anode identifiers and bnode identifiers as respective indexes,
        respectively, and weights as values.
        
        afun and bfun must be functions taking a value of the other node type as first argument
        and a weight as the second argument, that returns the corresponding value for the node.
        
        Similarly, wfun must be a function that takes the value of an anode and bnode and returns the
        correct weight.
        
        These functions must be such that:
        - wfun(afun(b,w),b) = w
        - wfun(a,bfun(a,w)) = w
        - afun(bfun(a,w),w) = a
        - afun(b,wfun(a,b)) = a
        - bfun(afun(b,w),w) = b
        - bfun(a,wfun(a,b)) = b
        
        error_fun must take two parameters (actual value, expected value) and return a number indicating the error for that
        particular edge
        """
        self.anodes_ratings = anodes_ratings
        self.bnodes_ratings = bnodes_ratings
        
        self.adata = wdata
        self.bdata = self.process_bdata(wdata)
        
        self.afun = afun
        self.bfun = bfun
        self.wfun = wfun
        
        self.aggregation_fun = aggregation_fun
        
        self.error_fun = error_fun
        self.error_aggregation_fun = error_aggregation_fun
        
        self.default_rating = default_rating
        
        self.average_error = math.inf
        
        self.iter = 0
        self.max_iter = max_iter
        
        # Finish early when error increases
        self.finish_early = finish_early
        self.last_average_error = math.inf
        self.last_anodes_ratings = anodes_ratings
        self.last_bnodes_ratings = bnodes_ratings       
    
        self.error_change_prop = error_change_prop
    
    def process_bdata(self, wdata):
        bdata = {}
        
        for anode_id, anode_data in wdata.items():
            for bnode_id, w in anode_data.items():
                bnode_data = bdata.get(bnode_id,{})
                bnode_data[anode_id] = w
                bdata[bnode_id] = bnode_data
        
        return bdata       
        
    def acycle(self):
        calc_values = []
        for anode_id in self.anodes_ratings:
            if anode_id in self.adata:
                calc_values.append(self.anode_process(anode_id))
            
        self.average_error = self.error_aggregation_fun(calc_values)
    
    def anode_process(self,anode_id):
        anode_data = self.adata.get(anode_id,False)
                
        calc_values = []
        for bnode_id, w in anode_data.items():
            bnode_value = self.bnodes_ratings.get(bnode_id,self.default_rating)
            # asum += clamp(self.afun(bnode_value,w))
            # asum += self.afun(bnode_value,w)
            calc_values.append(self.afun(bnode_value,w))
        
        avg = self.aggregation_fun(calc_values)
        
        # self.anodes_ratings[anode_id] = clamp(avg)
        self.anodes_ratings[anode_id] = avg
                                              
        return self.anode_error(anode_id)
        
    def anode_error(self,anode_id):
        anode_data = self.adata[anode_id]
        anode_value = self.anodes_ratings.get(anode_id,self.default_rating)
        
        calc_values = []
        for bnode_id, w in anode_data.items():
            bnode_value = self.bnodes_ratings.get(bnode_id,self.default_rating)
            # calculated_w = clamp(self.wfun(anode_value,bnode_value))
            calculated_w = self.wfun(anode_value,bnode_value)
            # error = clamp(self.error_fun(calculated_w,w))
            error = self.error_fun(calculated_w,w)
            calc_values.append(error)
            
        return self.error_aggregation_fun(calc_values)
    
    def bcycle(self):
        for bnode_id in self.bnodes_ratings:
            if bnode_id in self.bdata:
                self.bnode_process(bnode_id)                
    
    def bnode_process(self,bnode_id):
        bnode_data = self.bdata[bnode_id]
        
        calc_values = []
        for anode_id, w in bnode_data.items():
            anode_value = self.anodes_ratings.get(anode_id,self.default_rating)
            # bsum += clamp(self.bfun(anode_value,w))
            calc_values.append(self.bfun(anode_value,w))
        
        avg = self.aggregation_fun(calc_values)
        
        # self.bnodes_ratings[bnode_id] = clamp(avg)
        self.bnodes_ratings[bnode_id] = avg
    
    def save_last(self):
        self.last_anodes_ratings = self.anodes_ratings
        self.last_bnodes_ratings = self.bnodes_ratings
        self.last_average_error = self.average_error
        
    def restore_last(self):
        self.anodes_ratings = self.last_anodes_ratings
        self.bnodes_ratings = self.last_bnodes_ratings
        self.average_error = self.last_average_error
    
    def iterate(self):
        self.save_last()
        
        self.bcycle()
        self.acycle()
        
        self.iter += 1        
        print(f"Iteration {self.iter}, Average error: {self.average_error}")         
        
    def run(self):
        while (self.iter <= self.max_iter):            
            self.iterate()
            
            # Finish early with previous result if error went up
            if self.finish_early and (self.average_error > self.last_average_error):
                self.restore_last()
                print(f"Finishing early due to increased average error. Restoring previous values.")
                break
                
            # Finish if the error did not change by more than the error_change_prop
            if self.last_average_error != math.inf:
                abs_error_change = abs(self.average_error - self.last_average_error)
                if (abs_error_change / self.last_average_error) < self.error_change_prop:
                    print(f"Finishing due to change in average error less than {self.error_change_prop} (proportional)")
                    break
                
        return (self.anodes_ratings,self.bnodes_ratings)
    
    def measure_error(self,wdata_test):
        calc_values = []
        for anode_id in wdata_test:
            calc_values.append(self.test_node_error(wdata_test,anode_id))
            
        test_average_error = self.error_aggregation_fun(calc_values)
        
        return test_average_error
        
    def test_node_error(self,wdata_test,anode_id):
        anode_data = wdata_test[anode_id]
        anode_value = self.anodes_ratings.get(anode_id,self.default_rating)
        
        calc_values = []
        for bnode_id, w in anode_data.items():
            bnode_value = self.bnodes_ratings.get(bnode_id,self.default_rating)
            # calculated_w = clamp(self.wfun(anode_value,bnode_value))
            calculated_w = self.wfun(anode_value,bnode_value)
            # error = clamp(self.error_fun(calculated_w,w))
            error = self.error_fun(calculated_w,w)
            calc_values.append(error)
            
        return self.error_aggregation_fun(calc_values)
        
    
    def test(self):
        return

In [19]:
#scores_doubly_indexed = {map_id : {player_id : clamp(lognorm_value(scores_by_map_id[map_id][player_id]["accuracy"])) for player_id in scores_by_map_id[map_id]} for map_id in scores_by_map_id}
scores_doubly_indexed = {map_id : {player_id : truncexp_value(modified_score(scores_by_map_id[map_id][player_id]["accuracy"],scores_by_map_id[map_id][player_id]["modifiers"])) for player_id in scores_by_map_id[map_id]} for map_id in scores_by_map_id}

In [65]:
test_set_size = 0.2

scores_doubly_indexed_training_items, scores_doubly_indexed_test_items = sklearn.model_selection.train_test_split(list(scores_doubly_indexed.items()),test_size=test_set_size)
scores_doubly_indexed_training = dict(scores_doubly_indexed_training_items)
scores_doubly_indexed_test = dict(scores_doubly_indexed_test_items)

In [74]:
#default_rating = 7
default_rating = 10

map_ratings = {map_id : default_rating for map_id in scores_by_map_id}
player_ratings = {player_id: default_rating for player_id in scores_by_player_id}

# aggregation_fun = aggregation_median_f(default_rating)
aggregation_topscores_p = 0.25
error_aggregation_bottomscores_p = 0.5
aggregation_fun = aggregation_topscores_f(aggregation_topscores_p,default_rating)
error_aggregation_fun=aggregation_bottomscores_f(error_aggregation_bottomscores_p,0)

#bps = BiPartiteStabilizer(map_ratings,player_ratings,scores_doubly_indexed,
bps = BiPartiteStabilizer(map_ratings,player_ratings,scores_doubly_indexed_training,
                          accability_linear,skill_linear,score_linear,
                          aggregation_fun=aggregation_fun,
                          default_rating = default_rating,
                          error_aggregation_fun=error_aggregation_fun,
                          max_iter=500,
                          finish_early=False,
                          error_change_prop=0.001)

#bps.iterate()
#(maps_1,players_1) = (copy.deepcopy(bps.anodes_ratings), copy.deepcopy(bps.bnodes_ratings))
#maps_1_errors = {key:bps.anode_error(key) for key in maps_1}
#bps.iterate()
#(maps_2,players_2) = (copy.deepcopy(bps.anodes_ratings), copy.deepcopy(bps.bnodes_ratings))

(map_ratings,player_ratings) = bps.run()

Iteration 1, Average error: 0.1688684366864006
Iteration 2, Average error: 0.1313656729784262
Iteration 3, Average error: 0.1263274060276248
Iteration 4, Average error: 0.12732447721863638
Iteration 5, Average error: 0.12927804445580648
Iteration 6, Average error: 0.1310664424220803
Iteration 7, Average error: 0.1324905372060132
Iteration 8, Average error: 0.133562785256775
Iteration 9, Average error: 0.1343596604091077
Iteration 10, Average error: 0.13494814314198314
Iteration 11, Average error: 0.13538413845044528
Iteration 12, Average error: 0.13570695164603364
Iteration 13, Average error: 0.13594641562210566
Iteration 14, Average error: 0.13612362770272193
Iteration 15, Average error: 0.1362547643585967
Finishing due to change in average error less than 0.001 (proportional)


In [75]:
print(bps.measure_error(scores_doubly_indexed_test))

0.14514429911720494


In [None]:
maps_1_errors_outliers = {map_id:error for (map_id,error) in maps_1_errors.items() if error > 500}
print(maps_1_errors_outliers)

In [None]:
def errors_in_map(map_id,map_ratings,player_ratings):
    result = {}
    map_rating = map_ratings[map_id]
    for player_id,score in scores_doubly_indexed[map_id].items():
        player_rating = player_ratings[player_id]
        calculated_w = bps.wfun(map_rating,player_rating)
        error = bps.error_fun(calculated_w,score)
        result[player_id] = {"calculated_w":calculated_w,"actual_score":score,"error":error}
        
    return result

In [None]:
errors_this_map = errors_in_map("1442791",maps_1,players_1)
error_outliers = {player_id:error for (player_id,error) in errors_this_map.items() if error["error"] < 1}
print(error_outliers)

In [None]:
scores_by_map_id["2c803x91"]["76561198279631500"]

In [None]:
hard_maps = {map_id : rating for (map_id,rating) in maps_1.items() if rating < 2}
print(hard_maps)

In [44]:
map_outliers = {map_id : rating for (map_id,rating) in map_ratings.items() if rating < 3}
player_outliers = {player_id : rating for (player_id,rating) in player_ratings.items() if rating > 30}

In [45]:
print(map_outliers)
print(player_outliers)

{'3aa79xxxxxxxxx91': 2.9983917108433134}
{'3225556157461414': 33.171761349303125, '76561198153101808': 30.003939384760965, '76561198180044686': 31.456065397390393, '1922350521131465': 36.814033827785586, '76561198960449289': 30.20899468238246, '2169974796454690': 32.43384207023366, '76561198988695829': 33.78586499399944, '76561199104169308': 31.669039542578826, '76561199085118735': 30.972847109127706, '2085408448198355': 30.172603258615307, '76561198166061709': 33.45126583852632, '2769016623220259': 30.502264978763137, '76561198404774259': 32.52203161393578, '76561198313983208': 30.243132586166997, '76561198027277296': 30.902036435207542, '76561198333869741': 31.790193224184744, '76561199465530115': 31.61019030869898}


In [48]:
hard_maps = {map_id : rating for (map_id,rating) in map_ratings.items() if rating < 3.5}
print(hard_maps)

{'2dd6cxx92': 3.429056313784843, '36bce91': 3.459483467311768, '3442cxxx91': 3.4613813138465224, '100991': 3.495069364992336, '3697axxxx91': 3.142978265451161, '3aa79xxxxxxxxx91': 2.9983917108433134, '2c803x91': 3.24554357595033, '3b2bcxxxx91': 3.0955528094955755, '387a0xxxx91': 3.10844027959186}


In [49]:
easy_maps = {map_id : rating for (map_id,rating) in map_ratings.items() if rating > 25}
print(easy_maps)

{'37090x11': 26.395957859123858, '2713cx11': 25.41289796756997, '2a57111': 30.637709200160515, '33bd7xxxx11': 26.158319175785387, '3bb99x11': 28.557325769783343, '1e0b511': 26.79123865958854, '7e8f11': 31.013547175227338, '7a631': 25.666838990473547, 'd90a11': 26.899847909811}


In [50]:
good_players = {player_id : rating for (player_id,rating) in player_ratings.items() if rating > 30}
print(good_players)

{'3225556157461414': 33.171761349303125, '76561198153101808': 30.003939384760965, '76561198180044686': 31.456065397390393, '1922350521131465': 36.814033827785586, '76561198960449289': 30.20899468238246, '2169974796454690': 32.43384207023366, '76561198988695829': 33.78586499399944, '76561199104169308': 31.669039542578826, '76561199085118735': 30.972847109127706, '2085408448198355': 30.172603258615307, '76561198166061709': 33.45126583852632, '2769016623220259': 30.502264978763137, '76561198404774259': 32.52203161393578, '76561198313983208': 30.243132586166997, '76561198027277296': 30.902036435207542, '76561198333869741': 31.790193224184744, '76561199465530115': 31.61019030869898}


In [59]:
#print(scores_doubly_indexed["7e8f11"])
#print(maps_by_id["100991"])
print(players_by_id["1922350521131465"])

{'name': 'oermergeesh', 'country': 'US', 'id': '1922350521131465', 'avatar': 'https://cdn.assets.beatleader.xyz/1922350521131465R43.png'}


In [None]:
print(player_ratings["76561199108348236"])

In [None]:
print(scores_by_player_id["76561199108348236"])

In [None]:
print(map_ratings["38419x92"])

In [None]:
filtered_map_ratings = {map_id : rating for (map_id,rating) in map_ratings.items() if rating < 3.69 and rating > 3.68}
print(filtered_map_ratings)

In [None]:
print(len(scores))
print(len(filtered_scores))

In [None]:
print(scores_by_player_id["76561198167588802"])

In [None]:
print(truncexp_value(0.94))
print(perc_truncexp_value(12))

In [None]:
print(skill_probspace_truncexp(0,25))

In [None]:
print(score_probspace_truncexp(68,68))

In [None]:
print(skill_linear(1,25))

In [None]:
print(aggregation_topscores([1,2,3,4,5,6,7,8,9,10,11,12,13,14,100]))

In [None]:
[1,2,3,4][0:2]

In [None]:
a = [3,1,2,4]
a.sort()
print(a)

In [None]:
error_aggregation_fun([0.1,0.2,0.3,0.4,100])

In [None]:
modified_scores = [score for score in scores if score["modifiers"] != "" and score["accuracy"] < 100]

In [None]:
modifier_combos = set([score["modifiers"] for score in modified_scores])
print(modifier_combos)

In [None]:
print(modified_score(0.95,"DA,SF,NO"))

In [None]:
modifiers = ["SF","GN","SA","PM","IF","NO","BE","SS","FS","NB","SC","OD","DA","CS","NA","OP"]
remaining_modifier_combos = modifier_combos
for modifier in modifiers:
    remaining_modifier_combos = [modifier_combo.replace(modifier+",","") for modifier_combo in remaining_modifier_combos]
    remaining_modifier_combos = [modifier_combo.replace(modifier,"") for modifier_combo in remaining_modifier_combos]
    remaining_modifier_combos = set(remaining_modifier_combos)    
    if "" in remaining_modifier_combos:
        remaining_modifier_combos.remove("")    
    
print(remaining_modifier_combos)