# **Sorting reviews using the data collected from an e-commerce company**

# Importing the libraries

In [1]:
import pandas as pd
import math
import scipy.stats as st

# Preparing a dataframe from an e-trade company

In [2]:
up = [15, 70, 14, 4, 2, 5, 8, 37, 21, 52, 28, 147, 61, 30, 23, 40, 37, 61, 54, 18, 12, 68]
down = [0, 2, 2, 2, 15, 2, 6, 5, 23, 8, 12, 2, 1, 1, 5, 1, 2, 6, 2, 0, 2, 2]
df = pd.DataFrame({"up": up, "down": down})
df

Unnamed: 0,up,down
0,15,0
1,70,2
2,14,2
3,4,2
4,2,15
5,5,2
6,8,6
7,37,5
8,21,23
9,52,8


# Adding the 'scores_pos_ned_diff' variable using the positive and negative difference of the up-down values

In [3]:
def scores_pos_ned_diff(up, down):
    return up - down

df['scores_pos_ned_diff'] = df.apply(lambda x: scores_pos_ned_diff(x['up'], x['down']), axis=1)
df

Unnamed: 0,up,down,scores_pos_ned_diff
0,15,0,15
1,70,2,68
2,14,2,12
3,4,2,2
4,2,15,-13
5,5,2,3
6,8,6,2
7,37,5,32
8,21,23,-2
9,52,8,44


# Adding the 'score_average_rating' variable using the up-down values

In [4]:
def score_average_rating(up, down):
    if up + down == 0: return 0
    return up / (up + down)

df['score_average_rating'] = df.apply(lambda x: score_average_rating(x['up'], x['down']), axis=1)
df

Unnamed: 0,up,down,scores_pos_ned_diff,score_average_rating
0,15,0,15,1.0
1,70,2,68,0.972222
2,14,2,12,0.875
3,4,2,2,0.666667
4,2,15,-13,0.117647
5,5,2,3,0.714286
6,8,6,2,0.571429
7,37,5,32,0.880952
8,21,23,-2,0.477273
9,52,8,44,0.866667


# Adding the 'wilson_lower_bound' variable using the up-down values

In [5]:
def wilson_lower_bound(up, down, confidence=0.95):
    n = up + down
    if n == 0: return 0
    z = st.norm.ppf(1 - (1 - confidence) / 2)
    phat = 1.0 * up / n
    return (phat + z * z / (2 * n) - z * math.sqrt((phat * (1 - phat) + z * z / (4 * n)) / n)) / (1 + z * z / n)

df['wilson_lower_bound'] = df.apply(lambda x: wilson_lower_bound(x['up'], x['down']), axis=1)
df

Unnamed: 0,up,down,scores_pos_ned_diff,score_average_rating,wilson_lower_bound
0,15,0,15,1.0,0.796117
1,70,2,68,0.972222,0.904258
2,14,2,12,0.875,0.639772
3,4,2,2,0.666667,0.299993
4,2,15,-13,0.117647,0.03288
5,5,2,3,0.714286,0.358934
6,8,6,2,0.571429,0.325906
7,37,5,32,0.880952,0.749996
8,21,23,-2,0.477273,0.33755
9,52,8,44,0.866667,0.758348


# Sorting the dataframe by ''wilson_lower_bound' scores

In [6]:
df.sort_values('wilson_lower_bound', ascending=False)

Unnamed: 0,up,down,scores_pos_ned_diff,score_average_rating,wilson_lower_bound
11,147,2,145,0.986577,0.952384
12,61,1,60,0.983871,0.914133
1,70,2,68,0.972222,0.904258
21,68,2,66,0.971429,0.901677
18,54,2,52,0.964286,0.878812
15,40,1,39,0.97561,0.874049
13,30,1,29,0.967742,0.838059
16,37,2,35,0.948718,0.831144
19,18,0,18,1.0,0.824121
17,61,6,55,0.910448,0.818072


### Consequently, more convincingly we got the sorting of the reviews made on the products.

# **Thanks for checking my notebook!**