In [344]:
import copy
import json
import math
import scipy

In [5]:
ranked_data_path = f"bigresponse.json"
ranked_data_file = open(ranked_data_path,encoding="utf-8")
ranked_data = json.load(ranked_data_file)

In [437]:
players = ranked_data["players"]
maps = ranked_data["maps"]
scores = ranked_data["scores"]
filtered_scores = [score for score in scores if score["modifiers"] == "" and score["accuracy"] != 100]
# SHOULD FILTER TO SCORES WITH NO MODIFIERS AND/OR REWEIGHT THE SCORES

In [27]:
players_by_id = {player["id"] : player for player in players}
maps_by_id = {bmap["id"] : bmap for bmap in maps}

In [438]:
scores_by_player_id = {}
scores_by_map_id = {}

def add_score(score):
    player_id = score["playerId"]
    map_id = score["leaderboardId"]
    
    player_map = scores_by_player_id.get(player_id,{})
    player_map[map_id] = score
    
    scores_by_player_id[player_id] = player_map
    
    map_map = scores_by_map_id.get(map_id,{})
    map_map[player_id] = score
    
    scores_by_map_id[map_id] = map_map
    
#for score in scores:
for score in filtered_scores:
    add_score(score)

In [435]:
def clamp(v,vmin=0,vmax=30):
    return min(max(v,vmin),vmax)

In [74]:
linear_value_max = 10000
def linear_value(perc_value):
    """
    Turn [0,1) score into a more linear scale, to some degree. Just an approximation.
    Reducing distance from 100% by a certain proportion increases the linear score by that same proportion.
    """
    if (perc_value >= ((linear_value_max-1)/linear_value_max)):
        return linear_value_max
    
    if (perc_value <= 0):
        return 1
    
    return 1/(1-perc_value)

# Inverse of the above.
def perc_value(value):
    return (value-1)/value

In [263]:
"""
THE ARGUMENT BELOW IS WRONG IN THAT I DO NOT WANT (-inf,inf) BUT (0,inf), BUT I'M KEEPING IT FOR REFERENCE.
SEE BELOW FOR FIXING IT.

The issue with the above is the minimum value of 1. Ideally, I want to map (0,1) to (-inf,inf).
This is exactly what quantile functions do. So I can try some quantile functions that make sense.
(Renormalized) logit makes sense because sigmoid behaviours make some sense.
But perhaps what makes even more sense is probit because it's the quantile function of the normal distribution.
It makes sense to think of players, accability, and scores as normally distributed after re-scaling.

However, we need to find an adequate mean and standard deviation for this. Ideally, we set a percentage value
that is "standard" and a distance (in one direction) that is "about the standard deviation", and calculate
the reverse values backwards.

THIS PART IS ACCURATE, THOUGH.

But note that this is a mapping on the probability space rather than the population space.
That is, from (0,1) to (0,1). A probability distribution on the (0,1) interval.
We can do this with the Beta distribution, though other distributions might work.

I approximate a value for alpha and beta using this: https://homepage.divms.uiowa.edu/~mbognar/applets/beta.html
Trying to get the median around 0.9 and the probability close to 0 for 0.5, but a little bit higher for 0.65ish.
Beat Saber scores are typically at least 65%, on median probably around 0.9. They also must go down as the value
approaches 1, so beta must be greater than 1.

alpha = 10 and beta = 1.25 seemed to work quite well.
"""
beta_alpha = 10
beta_beta = 1.25

def beta_value(perc_value, beta_alpha=beta_alpha, beta_beta=beta_beta):
    return scipy.stats.beta.cdf(perc_value, beta_alpha, beta_beta)

def perc_beta_value(beta_value,beta_alpha=beta_alpha, beta_beta=beta_beta):
    return scipy.stats.beta.ppf(beta_value, beta_alpha, beta_beta)

"""
After renormalizing with the beta distribution, we apply the probit value, which assumes it's centred around 0.5.

We renormalize this to a value that is more typical of what we usually understand in Beat Saber.
For example, similar to star values. This is not really meant to be accurate, though.
"""

probit_mean = 7
probit_sd = 3

def probit_value(perc_value, probit_mean=probit_mean, probit_sd = probit_sd):
    return scipy.stats.norm.ppf(beta_value(perc_value),loc=probit_mean,scale=probit_sd)

def perc_probit_value(probit_value, probit_mean=probit_mean, probit_sd = probit_sd):
    return perc_beta_value(scipy.stats.norm.cdf(probit_value,loc=probit_mean,scale=probit_sd))

In [264]:
"""
We take the beta renormalization of probabilities above, and instead of using a probit function,
we map it to (0,inf) using a lognorm distribution.

There isn't a strong justification for this (other than the support), but there are some arguments.
What we want is to transform the probabilities into positive values that we can then multiply and divide in a way
that behaves well. Multiplying two values in the lognorm support produces a new lognorm value that is
the result of combining the other two in an independent way.
The shape of the lognorm also matches the way that we expect these values to look.
"""

# This parameterization is NOT a mistake. Read up on lognorm parameters and try to figure out why I did this.
# It isn't something terribly meaningful.
lognorm_mean = 7
lognorm_sd = 7
log_lognorm_sd = math.log(math.log(lognorm_sd))

def lognorm_value(perc_value, lognorm_mean=lognorm_mean, lognorm_sd=lognorm_sd, log_lognorm_sd=log_lognorm_sd):
    return scipy.stats.lognorm.ppf(beta_value(perc_value),s=log_lognorm_sd,loc=0,scale=lognorm_mean)

def perc_lognorm_value(lognorm_value, lognorm_mean=lognorm_mean, lognorm_sd=lognorm_sd, log_lognorm_sd=log_lognorm_sd):
    return perc_beta_value(scipy.stats.lognorm.cdf(lognorm_value,s=log_lognorm_sd,loc=0,scale=lognorm_mean))

In [380]:
"""
From here onwards we assume score is linearized by using linear_score above.

We normalize around a value of 7.
This means that a X star player on a 7 accability map achieves an X star score
and a 7 star player on an X accability map achieves an X star score.
"""

linear_mean = 7
min_value = 0.00001

def score_linear(skill,accability,linear_mean = linear_mean):
    return skill * accability / linear_mean

def accability_linear(skill,score,linear_mean = linear_mean):
    if (skill < min_value):
        vskill = min_value
    else:
        vskill = skill
    return score / vskill * linear_mean

def skill_linear(accability,score,linear_mean = linear_mean):
    if (accability < min_value):
        vaccability = min_value
    else:
        vaccability = accability
    return score / vaccability * linear_mean

In [102]:
"""
The main idea here is that the score of a player with skill level x on a map with accability x will be x.
In other words, the skill and accability are on the same scale as the score.
A player with skill level x will obtain score x on a map of their corresponding accability. Etc.

PROBLEMS:
The idea is misconstrued. Better players shine at less accable maps, so the equalization is to a large degree fake,
meaning the scaling is broken.
"""
def score_equalized(skill,accability):
    return math.sqrt(skill * accability)

def accability_equalized(skill,score):
    return score*score / skill

def skill_linear(accability,score):
    return score*score / accability

In [379]:
def abs_prop_error(value,evalue):        
    if (value == 0):
        vvalue = 0.0001
    else:
        vvalue = value
    return abs((vvalue-evalue)/vvalue)

In [439]:
class BiPartiteStabilizer:
    """No description yet"""
    
    def __init__(self,anodes_ratings,bnodes_ratings,wdata,afun,bfun,wfun,default_rating=1,max_iter=50,error_fun=abs_prop_error):
        """
        anodes_ratings and bnodes_ratings must be dictionaries with the node identifiers as keys
        and initial ratings as values.
        
        wdata must be a doubly indexed dictionary with anode identifiers and bnode identifiers as respective indexes,
        respectively, and weights as values.
        
        afun and bfun must be functions taking a value of the other node type as first argument
        and a weight as the second argument, that returns the corresponding value for the node.
        
        Similarly, wfun must be a function that takes the value of an anode and bnode and returns the
        correct weight.
        
        These functions must be such that:
        - wfun(afun(b,w),b) = w
        - wfun(a,bfun(a,w)) = w
        - afun(bfun(a,w),w) = a
        - afun(b,wfun(a,b)) = a
        - bfun(afun(b,w),w) = b
        - bfun(a,wfun(a,b)) = b
        
        error_fun must take two parameters (actual value, expected value) and return a number indicating the error for that
        particular edge
        """
        self.anodes_ratings = anodes_ratings
        self.bnodes_ratings = bnodes_ratings
        
        self.adata = wdata
        self.bdata = self.process_bdata(wdata)
        
        self.afun = afun
        self.bfun = bfun
        self.wfun = wfun
        
        self.error_fun = error_fun
        
        self.default_rating = default_rating
        
        self.average_error = math.inf
        
        self.iter = 0
        self.max_iter = max_iter
    
    def process_bdata(self, wdata):
        bdata = {}
        
        for anode_id, anode_data in wdata.items():
            for bnode_id, w in anode_data.items():
                bnode_data = bdata.get(bnode_id,{})
                bnode_data[anode_id] = w
                bdata[bnode_id] = bnode_data
        
        return bdata       
        
    def acycle(self):
        aerror = 0
        n = len(self.anodes_ratings)
        for anode_id in self.anodes_ratings:
            aerror += self.anode_process(anode_id)
            
        if (n > 0):
            self.average_error = clamp(aerror/n)
        else:
            self.average_error = 0
    
    def anode_process(self,anode_id):
        anode_data = self.adata[anode_id]
        
        asum = 0
        n = len(anode_data)
        for bnode_id, w in anode_data.items():
            bnode_value = self.bnodes_ratings.get(bnode_id,self.default_rating)
            asum += clamp(self.afun(bnode_value,w))
        
        if (n > 0):
            avg = asum/n
        else:
            avg = 0
        
        self.anodes_ratings[anode_id] = clamp(avg)
                                              
        return self.anode_error(anode_id)
        
    def anode_error(self,anode_id):
        anode_data = self.adata[anode_id]
        anode_value = self.anodes_ratings.get(anode_id,self.default_rating)
        
        aerror = 0
        n = len(anode_data)
        for bnode_id, w in anode_data.items():
            bnode_value = self.bnodes_ratings.get(bnode_id,self.default_rating)
            calculated_w = clamp(self.wfun(anode_value,bnode_value))
            error = clamp(self.error_fun(calculated_w,w))
            aerror += error
            
        if (n > 0):
            return clamp(aerror/n)
        else:
            return 0

    def bcycle(self):
        for bnode_id in self.bnodes_ratings:
            self.bnode_process(bnode_id)
    
    def bnode_process(self,bnode_id):
        bnode_data = self.bdata[bnode_id]
        
        bsum = 0
        n = len(bnode_data)
        for anode_id, w in bnode_data.items():
            anode_value = self.anodes_ratings.get(anode_id,self.default_rating)
            bsum += clamp(self.bfun(anode_value,w))
        
        if (n > 0):
            avg = bsum/n
        else:
            avg = 0
        
        self.bnodes_ratings[bnode_id] = clamp(avg)
    
    def iterate(self):
        self.bcycle()
        self.acycle()
        
        self.iter += 1
        
        print(f"Iteration {self.iter}, Average error: {self.average_error}")
        
    def run(self):
        while (self.iter <= self.max_iter):
            self.iterate()
        
        return (self.anodes_ratings,self.bnodes_ratings)
    
    def test(self):
        return

In [440]:
scores_doubly_indexed = {map_id : {player_id : clamp(lognorm_value(scores_by_map_id[map_id][player_id]["accuracy"])) for player_id in scores_by_map_id[map_id]} for map_id in scores_by_map_id}

In [441]:
default_rating = 7

map_ratings = {map_id : default_rating for map_id in scores_by_map_id}
player_ratings = {player_id: default_rating for player_id in scores_by_player_id}

bps = BiPartiteStabilizer(map_ratings,player_ratings,scores_doubly_indexed,accability_linear,skill_linear,score_linear,max_iter=7)

#bps.iterate()
#(maps_1,players_1) = (copy.deepcopy(bps.anodes_ratings), copy.deepcopy(bps.bnodes_ratings))
#bps.iterate()
#(maps_2,players_2) = (copy.deepcopy(bps.anodes_ratings), copy.deepcopy(bps.bnodes_ratings))
(map_ratings,player_ratings) = bps.run()

Iteration 1, Average error: 0.18907809734705544
Iteration 2, Average error: 0.1699605772916541
Iteration 3, Average error: 0.16513725817019004
Iteration 4, Average error: 0.16384153536419413
Iteration 5, Average error: 0.1635329070284382
Iteration 6, Average error: 0.16349535768880627
Iteration 7, Average error: 0.16352111991852028
Iteration 8, Average error: 0.1635525845257522


In [442]:
map_outliers = {map_id : rating for (map_id,rating) in map_ratings.items() if rating > 20}
player_outliers = {player_id : rating for (player_id,rating) in player_ratings.items() if rating > 20}

In [443]:
print(map_outliers)
print(player_outliers)

{}
{}


In [444]:
hard_maps = {map_id : rating for (map_id,rating) in map_ratings.items() if rating < 3}
print(hard_maps)

{'3697axxxx91': 2.8624011062297696, '3aa79xxxxxxxxx91': 2.8031388612150603, '2c803x91': 2.8821235746592313, '3b2bcxxxx91': 2.9767392121042957}


In [445]:
easy_maps = {map_id : rating for (map_id,rating) in map_ratings.items() if rating > 14}
print(easy_maps)

{'35d5bx51': 14.158341327679167, '2a57111': 14.977812110383827, '3bb99x11': 14.117199992125041, 'd90a11': 14.172726101941393}


In [446]:
good_players = {player_id : rating for (player_id,rating) in player_ratings.items() if rating > 18}
print(good_players)

{'76561198988695829': 18.990135386663926, '76561199085118735': 18.717485485516477, '76561198166061709': 18.610395549466286, '2769016623220259': 18.254324153594656, '76561198404774259': 18.02911657143716, '76561198333869741': 18.52770458659781}


In [482]:
#print(scores_doubly_indexed["7e8f11"])
print(maps_by_id["3aa79xxxxxxxxx91"])
#print(players_by_id["76561198333869741"])

{'hash': '7c9e0a7c523395c7ef9d79006b9d42dc6ab8b44a', 'name': 'Gleodream', 'id': '3aa79xxxxxxxxx91', 'songId': '3aa79xxxxxxxxx', 'modeName': 'Standard', 'difficultyName': 'ExpertPlus', 'accRating': 13.423407, 'passRating': 11.142719, 'techRating': 13.089587, 'predictedAcc': 0.9590463, 'modifiersRating': {'id': 1201606, 'fsPredictedAcc': 0.9549841, 'fsPassRating': 14.057323, 'fsAccRating': 14.010982, 'fsTechRating': 13.793677, 'fsStars': 16.892323, 'ssPredictedAcc': 0.9620437, 'ssPassRating': 9.337413, 'ssAccRating': 12.964377, 'ssTechRating': 12.825914, 'ssStars': 13.682217, 'sfPredictedAcc': 0.94866556, 'sfPassRating': 18.739632, 'sfAccRating': 14.878941, 'sfTechRating': 14.6978245, 'sfStars': 20.363726}}


In [473]:
print(map_ratings["3b5a9xxxx92"])

3.681517361588999


In [474]:
filtered_map_ratings = {map_id : rating for (map_id,rating) in map_ratings.items() if rating < 3.69 and rating > 3.68}
print(filtered_map_ratings)

{'340c0xxxxx91': 3.684381639202773, '1b49e91': 3.6856939849638377, '3b5a9xxxx92': 3.681517361588999}
