In [1]:
import pandas as pd
import math
import scipy.stats as st
from sklearn.preprocessing import MinMaxScaler
import itertools
import statsmodels.stats.api as sms
from scipy.stats import ttest_1samp, shapiro, levene, ttest_ind, mannwhitneyu, \
    pearsonr, spearmanr, kendalltau, f_oneway, kruskal
from statsmodels.stats.proportion import proportions_ztest


In [2]:
pd.set_option('display.max_rows', None)  # Tüm satırları göster
pd.set_option('display.max_columns', None)

df_ = pd.read_csv("/kaggle/input/amozon-review/amazon_review.csv")
df = df_.copy()
df.head(10)
df.shape
df.dtypes
df.nunique()
df['overall'].value_counts()

overall
5.0    3922
4.0     527
1.0     244
3.0     142
2.0      80
Name: count, dtype: int64

In [3]:
def time_based_weighted_average(dateframe, w1=0.3, w2=0.28, w3=0.23, w4=0.19):
    # Calculate the weighted average based on day_diff intervals
    avg_recent = dateframe.loc[dateframe['day_diff'] <= 30, "overall"].mean() * w1
    avg_30_90 = dateframe.loc[(dateframe['day_diff'] > 30) & (dateframe['day_diff'] <= 90), "overall"].mean() * w2
    avg_90_180 = dateframe.loc[(dateframe['day_diff'] > 90) & (dateframe['day_diff'] <= 180), "overall"].mean() * w3
    avg_older = dateframe.loc[dateframe['day_diff'] > 180, "overall"].mean() * w4

    time_based_weighted_average = avg_recent + avg_30_90 + avg_90_180 + avg_older

    return time_based_weighted_average

time_based_weighted_average(df)

4.705931537954305

In [4]:
df['helpful_no'] = df["total_vote"] - df["helpful_yes"]

df['score_pos_neg_diff'] = df["helpful_yes"] - df['helpful_no']

def calculate_score(row):
    if row["total_vote"] == 0:
        return 0
    else:
        return row["helpful_yes"] / row["total_vote"]

df['score_average_rating'] = df.apply(calculate_score, axis=1)

df.sample(100)

Unnamed: 0,reviewerID,asin,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime,day_diff,helpful_yes,total_vote,helpful_no,score_pos_neg_diff,score_average_rating
758,A2UNWZVZWZCNJN,B007WTAJTO,Bruce,"[0, 0]",Also fast .. Not much more I can say. I bought...,5.0,Great card,1381881600,2013-10-16,418,0,0,0,0,0.0
4537,A3VZSX37QLKF68,B007WTAJTO,Tim Green,"[0, 0]",I use this in my Hero3 white edition camera fo...,5.0,Used in Hero 3 white edition,1369094400,2013-05-21,566,0,0,0,0,0.0
1209,A1H6MKRB6VZUJC,B007WTAJTO,David D. Gali II,"[0, 0]","Hey, it's memory. Everybody needs it, and you ...",5.0,Great,1379808000,2013-09-22,442,0,0,0,0,0.0
3520,ANP7CNK44DLNX,B007WTAJTO,"Oscar Rodriguez ""Oscar Rodz""","[0, 0]",I was able to add 10 full length movies at 720...,5.0,Great and fast memory,1394150400,2014-07-03,158,0,0,0,0,0.0
4883,A1YEPFLLH42OU1,B007WTAJTO,"Yu-jin &amp; Tracy Chia ""Yu-Jin Chia""","[6, 8]","This is a class 10 microSD card, and it works ...",4.0,"Good memory, adapter = yikes",1354147200,2012-11-29,739,6,8,2,4,0.75
821,A3OAX0WCV109IQ,B007WTAJTO,Capt. John,"[1, 1]","Do not buy, card failed on the third usage. K...",1.0,Dead Card,1368144000,2013-10-05,429,1,1,0,1,1.0
2952,A24WMBO3LUVUZR,B007WTAJTO,Manuel Mendoza,"[0, 0]",works like a charm...expand my tablet capacity...,5.0,works like a charm,1386633600,2013-10-12,422,0,0,0,0,0.0
3151,A38JHDKF2MC25Q,B007WTAJTO,Michael,"[0, 0]",for the price this memory card works well and ...,5.0,good,1363824000,2013-03-21,627,0,0,0,0,0.0
2184,AP7PULMWAXXLB,B007WTAJTO,"Jay_S ""jay_s""","[0, 0]","There isn't much more to be said.. First, ther...",5.0,Sandisk quality means a lot...,1388620800,2014-02-01,310,0,0,0,0,0.0
2271,A1JEE8I76DYW2Q,B007WTAJTO,"Jess Baidwan ""Mr. Tidbit""","[0, 0]",It is so painful when you just buy that new ga...,5.0,Keep your smart phone or tablet happy,1358208000,2013-01-15,692,0,0,0,0,0.0


In [5]:
def wilson_lower_bound(up, down, confidence = 0.95):

    n = up + down
    if n == 0:
        return 0
    z = st.norm.ppf(1 - confidence /2)
    phat = 1.0 * up / n
    return (phat + z*z / (2 * n) - z*math.sqrt((phat *(1-phat) + z*z / (4*n)) /n)) / (1 + z*z /n)

df['wilson_lower_bound'] = df.apply(lambda row: wilson_lower_bound(row['helpful_yes'], row['total_vote'] - row['helpful_yes']), axis=1)

df_sorted = df.sort_values(by='wilson_lower_bound', ascending=False)
df_sorted.head(20)

Unnamed: 0,reviewerID,asin,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime,day_diff,helpful_yes,total_vote,helpful_no,score_pos_neg_diff,score_average_rating,wilson_lower_bound
1465,A6I8KXYK24RTB,B007WTAJTO,D. Stein,"[7, 7]","I for one have not bought into Google's, or an...",4.0,Finally.,1397433600,2014-04-14,238,7,7,0,7,1.0,0.999439
1609,A2TPXOZSU1DACQ,B007WTAJTO,Eskimo,"[7, 7]",I have always been a sandisk guy. This cards ...,5.0,Bet you wish you had one of these,1395792000,2014-03-26,257,7,7,0,7,1.0,0.999439
4072,A22GOZTFA02O2F,B007WTAJTO,"sb21 ""sb21""","[6, 6]",I used this for my Samsung Galaxy Tab 2 7.0 . ...,5.0,Used for my Samsung Galaxy Tab 2 7.0,1347321600,2012-11-09,759,6,6,0,6,1.0,0.999345
121,A2Z4VVF1NTJWPB,B007WTAJTO,A. Lee,"[5, 5]",Update: providing an update with regard to San...,5.0,ready for use on the Galaxy S3,1346803200,2012-05-09,943,5,5,0,5,1.0,0.999214
2583,A3MEPYZVTAV90W,B007WTAJTO,J. Wong,"[5, 5]",I bought this Class 10 SD card for my GoPro 3 ...,5.0,Works Great with a GoPro 3 Black!,1370649600,2013-08-06,489,5,5,0,5,1.0,0.999214
1753,ALPLKR59QMBUX,B007WTAJTO,G. Becker,"[5, 5]",Puchased this card right after I received my S...,5.0,Use Nothing Other Than the Best,1350864000,2012-10-22,777,5,5,0,5,1.0,0.999214
1072,A2O96COBMVY9C4,B007WTAJTO,Crysis Complex,"[5, 5]",What more can I say? The 64GB micro SD works f...,5.0,Works wonders for the Galaxy Note 2!,1349395200,2012-05-10,942,5,5,0,5,1.0,0.999214
1142,A1PLHPPAJ5MUXG,B007WTAJTO,Daniel Pham(Danpham_X @ yahoo. com),"[5, 5]",As soon as I saw that this card was announced ...,5.0,Great large capacity card,1396396800,2014-02-04,307,5,5,0,5,1.0,0.999214
1006,A1GDY4MP5QVXPD,B007WTAJTO,cmoisuperlea,"[4, 4]",I own a couple of these and I've tested them o...,5.0,Works as described.,1388361600,2013-12-30,343,4,4,0,4,1.0,0.999018
1739,A1KJZ6NPUYA22D,B007WTAJTO,Gary A. Schmitt,"[4, 4]",I was a bit concerned when I first ordered thi...,5.0,64GB sdxc - works perfectly with Samsung Galax...,1372291200,2013-06-27,529,4,4,0,4,1.0,0.999018
