# Reverse engineer the rank algorithm

Adapted from [calculateRank.js][1]

[1]: https://github.com/anuraghazra/github-readme-stats/blob/f83080601e87dfe76b003cd08c675cbb2f6204e9/src/calculateRank.js#L2

In [1]:
from math import sqrt, exp
from functools import partial

### Define enums and stats class

In [2]:
class Offset:
    COMMITS = 1.65
    CONTRIBS = 1.65
    ISSUES = 1
    STARS = 0.75
    PRS = 0.5
    FOLLOWERS = 0.45
    REPOS = 1


class Rank:
    S = 1
    DOUBLE_A = 25
    A2 = 45
    A3 = 60
    B = 100

In [3]:
class Stats:
    def __init__(self, counts):
        self.__commits = counts.get('commits', 0)
        self.__contribs = counts.get('contribs', 0)
        self.__issues = counts.get('issues', 0)
        self.__stars = counts.get('stars', 0)
        self.__prs = counts.get('prs', 0)
        self.__followers = counts.get('followers', 0)
        self.__repos = counts.get('repos', 0)
    
    @property
    def commits(self):
        return self.__commits
    
    @property
    def contribs(self):
        return self.__contribs
    
    @property
    def issues(self):
        return self.__issues
    
    @property
    def stars(self):
        return self.__stars
    
    @property
    def prs(self):
        return self.__prs
    
    @property
    def followers(self):
        return self.__followers
    
    @property
    def repos(self):
        return self.__repos

### Define helper functions

In [4]:
def sum_enum(key):
    offsets = (Offset.COMMITS + Offset.CONTRIBS
               + Offset.ISSUES + Offset.STARS + Offset.PRS
               + Offset.FOLLOWERS + Offset.REPOS)
    ranks = Rank.S + Rank.DOUBLE_A + Rank.A2 + Rank.A3 + Rank.B
    sums = {'offset': offsets, 'rank': ranks}
    return sums.get(key, 0)

In [5]:
def normalcdf(mean, sigma, to):
    z = (to - mean) / sqrt(2 * sigma * sigma)
    t = 1 / (1 + 0.3275911 * abs(z))
    a1 = 0.254829592
    a2 = -0.284496736
    a3 = 1.421413741
    a4 = -1.453152027
    a5 = 1.061405429
    erf = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * exp(-z * z)
    sign = -1 if z < 0 else 1
    return (1 + sign * erf) / 2

### Define compute functions

In [6]:
def compute_score(stats):
    score = (stats.commits * Offset.COMMITS
             + stats.contribs * Offset.CONTRIBS
             + stats.issues * Offset.ISSUES
             + stats.stars * Offset.STARS
             + stats.prs * Offset.PRS
             + stats.followers * Offset.FOLLOWERS
             + stats.repos * Offset.REPOS)
    return score / 100

In [7]:
def compute_normalized_score(score):
    total_offset = sum_enum('offset')
    total_rank = sum_enum('rank')
    normalized = normalcdf(score, total_rank, total_offset)
    return normalized * 100

In [8]:
def compute_rank(normalized_score):
    level = 'B'
    if normalized_score < Rank.B:
        level = 'B+'
    if normalized_score < Rank.A3:
        level = 'A+'
    if normalized_score < Rank.A2:
        level = 'A++'
    if normalized_score < Rank.DOUBLE_A:
        level = 'S'
    if normalized_score < Rank.S:
        level = 'S+'
    return level

### Sanity check with my GitHub stats

In [9]:
my_profile = {'repos': 33,
              'commits': 880, 
              'contribs': 7,
              'followers': 5,
              'prs': 30,
              'issues': 41,
              'stars': 1}
my_stats = Stats(my_profile)
my_score = compute_score(my_stats)
my_normalized_score = compute_normalized_score(my_score)
my_rank = compute_rank(my_normalized_score)
print(f"score = {my_score} rank = {my_rank}")

score = 15.5555 rank = A+


### Everyone gets an A+!

In [10]:
zero_normalized_score = compute_normalized_score(0)
zero_rank = compute_rank(zero_normalized_score)
zero_rank

'A+'

### Find the rank distribution
there's probably an O(1) way to do this...

In [11]:
def binary_search(function, target, lower=0, upper=1e4):
    mid = -1
    while lower + 1 < upper:
        mid = (lower + upper) // 2
        if function(mid) >= target:
            lower = mid
        else:
            upper = mid
    return mid


find_distribution = partial(binary_search, function=compute_normalized_score)

In [12]:
reachables = (Rank.A2, Rank.DOUBLE_A, Rank.S)
names = ('A++', 'S', 'S+')
thresholds = {}
for reachable, name in zip(reachables, names):
    result = find_distribution(target=reachable)
    print(f"Score needed to reach {name}: {result}")
    thresholds[name] = result

Score needed to reach A++: 37.0
Score needed to reach S: 163.0
Score needed to reach S+: 545.0


### How many commits until I reach the next rank?

In [13]:
def calculate_commits(current, next):
    assert current < next
    gap = (next - current) * 100
    commits = round(gap / Offset.COMMITS)
    per_day = round(commits / 365, 1)
    print(f"You need {commits} commits, or around "
          f"{per_day} commits a day for the next 365 days.")

In [14]:
calculate_commits(my_score, thresholds['A++'])

You need 1300 commits, or around 3.6 commits a day for the next 365 days.
