# Import Necessary Libraries

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as st
import math

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.width", 500)
pd.set_option("display.float_format", lambda x: "%.2f" % x)

# Import Dataset

In [None]:
up = [15, 70, 14, 4, 2, 5, 8, 37, 21, 52, 28, 147, 61, 30, 23, 40, 37, 61, 54, 18, 12, 68]
down = [0, 2, 2, 2, 15, 2, 6, 5, 23, 8, 12, 2, 1, 1, 5, 1, 2, 6, 2, 0, 2, 2]

reviews = pd.DataFrame({"up": up, "down": down})
reviews

Unnamed: 0,up,down
0,15,0
1,70,2
2,14,2
3,4,2
4,2,15
5,5,2
6,8,6
7,37,5
8,21,23
9,52,8


# Up-Down Difference Score

up(like) - down(dislike)

In [None]:
def up_down_diff_score(up, down):
  return up - down

In [None]:
up_down_diff_score(reviews["up"], reviews["down"])

0      15
1      68
2      12
3       2
4     -13
5       3
6       2
7      32
8      -2
9      44
10     16
11    145
12     60
13     29
14     18
15     39
16     35
17     55
18     52
19     18
20     10
21     66
dtype: int64

In [None]:
reviews.apply(lambda x: up_down_diff_score(x["up"], x["down"]), axis=1)

0     15.00
1     68.00
2     12.00
3      2.00
4    -13.00
5      3.00
6      2.00
7     32.00
8     -2.00
9     44.00
10    16.00
11   145.00
12    60.00
13    29.00
14    18.00
15    39.00
16    35.00
17    55.00
18    52.00
19    18.00
20    10.00
21    66.00
dtype: float64

# Score Average Rating

up / (up+down)

In [None]:
def score_average_rating(up, down):
  if up+down == 0:
    return 0
  return up / (up+down)

In [None]:
reviews.apply(lambda x: score_average_rating(x["up"], x["down"]), axis=1)

0    1.00
1    0.97
2    0.88
3    0.67
4    0.12
5    0.71
6    0.57
7    0.88
8    0.48
9    0.87
10   0.70
11   0.99
12   0.98
13   0.97
14   0.82
15   0.98
16   0.95
17   0.91
18   0.96
19   1.00
20   0.86
21   0.97
dtype: float64

# Wilson Lower Bound Score

$ \text{Wilson Skoru} = \frac{p + \frac{z^2}{2n} - z \sqrt{\frac{p(1-p)}{n} + \frac{z^2}{4n^2}}}{1 + \frac{z^2}{n}} $

   - Burada:
     - \( p \): Pozitif
     - \( n \): Total
     - \( z \): Z

In [None]:
def wilson_lower_bound(up, down, confidence=0.95):
    n = up + down
    if n == 0:
        return 0
    z = st.norm.ppf(1 - (1 - confidence) / 2)
    phat = 1.0 * up / n
    return (phat + z * z / (2 * n) - z * math.sqrt((phat * (1 - phat) + z * z / (4 * n)) / n)) / (1 + z * z / n)

In [None]:
reviews.apply(lambda x: wilson_lower_bound(x["up"], x["down"]), axis=1)

0    0.80
1    0.90
2    0.64
3    0.30
4    0.03
5    0.36
6    0.33
7    0.75
8    0.34
9    0.76
10   0.55
11   0.95
12   0.91
13   0.84
14   0.64
15   0.87
16   0.83
17   0.82
18   0.88
19   0.82
20   0.60
21   0.90
dtype: float64

# Apply All Methods

* up-down diff score
* average rating score
* wilson lower bound score

In [None]:
reviews["up_down_diff_score"] = reviews.apply(lambda x: up_down_diff_score(x["up"], x["down"]), axis=1)
reviews["average_rating_score"] = reviews.apply(lambda x: score_average_rating(x["up"], x["down"]), axis=1)
reviews["wilson_lower_bound_score"] = reviews.apply(lambda x: wilson_lower_bound(x["up"], x["down"]), axis=1)

In [None]:
reviews

Unnamed: 0,up,down,up_down_diff_score,average_rating_score,wilson_lower_bound_score
0,15,0,15,1.0,0.8
1,70,2,68,0.97,0.9
2,14,2,12,0.88,0.64
3,4,2,2,0.67,0.3
4,2,15,-13,0.12,0.03
5,5,2,3,0.71,0.36
6,8,6,2,0.57,0.33
7,37,5,32,0.88,0.75
8,21,23,-2,0.48,0.34
9,52,8,44,0.87,0.76


In [None]:
reviews.sort_values("wilson_lower_bound_score", ascending=False)

Unnamed: 0,up,down,up_down_diff_score,average_rating_score,wilson_lower_bound_score
11,147,2,145,0.99,0.95
12,61,1,60,0.98,0.91
1,70,2,68,0.97,0.9
21,68,2,66,0.97,0.9
18,54,2,52,0.96,0.88
15,40,1,39,0.98,0.87
13,30,1,29,0.97,0.84
16,37,2,35,0.95,0.83
19,18,0,18,1.0,0.82
17,61,6,55,0.91,0.82
