In [6]:
from math import sqrt

"""
https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9#.1g1qoyh5o

https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval

https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval

http://stats.stackexchange.com/questions/82475/calculate-the-confidence-interval-for-the-mean-of-a-beta-distribution

In the above formula the parameters are defined in a following way:
p is the observed fraction of positive ratings
n is the total number of ratings
zα/2 is the (1-α/2) quantile of the standard normal distribution
Let’s summarize the above in a following manner:
The confidence sort treats the vote count as a statistical sampling of a hypothetical full vote by everyone
The confidence sort gives a comment a provisional ranking that it is 85% sure it will get to
The more votes, the closer the 85% confidence score gets to the actual score
Wilson’s interval has good properties for a small number of trials and/or an extreme probability


http://www.sigmazone.com/binomial_confidence_interval.htm
"""

def _confidence(ups, downs):
    n = ups + downs

    if n == 0:
        return 0

    z = 1.281551565545
    p = float(ups) / n

    left = p + 1/(2*n)*z*z
    right = z*sqrt(p*(1-p)/n + z*z/(4*n*n))
    under = 1+1/n*z*z

    return (left - right) / under

def confidence(ups, downs):
    if ups + downs == 0:
        return 0
    else:
        return _confidence(ups, downs)

In [2]:
confidence(1,1)

0.16425172002984176

In [3]:
confidence(1,2)

0.10577563612168649

In [5]:
confidence(5,2)

0.4724628165565235

In [7]:
confidence(50,20)

0.6407975973186971

In [8]:
confidence(500,200)

0.6919218926264943