In [1]:
import pandas as pd
import math
import scipy.stats as st
from sklearn.preprocessing import MinMaxScaler
import itertools
import statsmodels.stats.api as sms
from scipy.stats import ttest_1samp, shapiro, levene, ttest_ind, mannwhitneyu, \
    pearsonr, spearmanr, kendalltau, f_oneway, kruskal
from statsmodels.stats.proportion import proportions_ztest


In [2]:
pd.set_option('display.max_rows', None)  # Tüm satırları göster
pd.set_option('display.max_columns', None)

df_ = pd.read_csv("/kaggle/input/amozon-review/amazon_review.csv")
df = df_.copy()
df.head(10)
df.shape
df.dtypes
df.nunique()
df['overall'].value_counts()

overall
5.0    3922
4.0     527
1.0     244
3.0     142
2.0      80
Name: count, dtype: int64

In [3]:
def time_based_weighted_average(dateframe, w1=0.3, w2=0.28, w3=0.23, w4=0.19):
    # Calculate the weighted average based on day_diff intervals
    avg_recent = dateframe.loc[dateframe['day_diff'] <= 30, "overall"].mean() * w1
    avg_30_90 = dateframe.loc[(dateframe['day_diff'] > 30) & (dateframe['day_diff'] <= 90), "overall"].mean() * w2
    avg_90_180 = dateframe.loc[(dateframe['day_diff'] > 90) & (dateframe['day_diff'] <= 180), "overall"].mean() * w3
    avg_older = dateframe.loc[dateframe['day_diff'] > 180, "overall"].mean() * w4

    time_based_weighted_average = avg_recent + avg_30_90 + avg_90_180 + avg_older

    return time_based_weighted_average

time_based_weighted_average(df)

4.705931537954305

In [4]:
df['helpful_no'] = df["total_vote"] - df["helpful_yes"]

df['score_pos_neg_diff'] = df["helpful_yes"] - df['helpful_no']

def calculate_score(row):
    if row["total_vote"] == 0:
        return 0
    else:
        return row["helpful_yes"] / row["total_vote"]

df['score_average_rating'] = df.apply(calculate_score, axis=1)

df.sample(100)

Unnamed: 0,reviewerID,asin,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime,day_diff,helpful_yes,total_vote,helpful_no,score_pos_neg_diff,score_average_rating
1000,A2XQRFICOZW7QN,B007WTAJTO,C-LoS,"[0, 0]",Great price! works flawless with my note 3. I ...,5.0,Works great with Note 3,1386806400,2013-12-12,361,0,0,0,0,0.0
1108,A2HIW1UEUBZRH5,B007WTAJTO,Dale,"[0, 0]","Use it in my Ainol Spark, and it gets great re...",5.0,"Great disk, no issue with speeds.",1370995200,2013-12-06,367,0,0,0,0,0.0
4464,A2IQ6DRQ2I1ZVI,B007WTAJTO,terry gonzalez,"[0, 0]",It is whats it is. It does it very well. No hi...,5.0,Memory Card,1392681600,2014-02-18,293,0,0,0,0,0.0
2367,AF7R7E8N3BEEH,B007WTAJTO,Joe,"[0, 0]",SanDisk makes some of the best storage media o...,5.0,Great SD card,1388361600,2013-12-30,343,0,0,0,0,0.0
4729,AD78KCAW3LUDB,B007WTAJTO,Vince52,"[0, 0]",I use it in my Sansa Clip Zip 8GB MP3 player a...,5.0,Used for MP3 player to gain more space.,1373760000,2013-07-14,512,0,0,0,0,0.0
4532,A39GIYMC5BD5I3,B007WTAJTO,"Ticozolano ""Guille""","[0, 0]",What can I say that hasn't said already for th...,5.0,Perfect,1382918400,2013-10-28,406,0,0,0,0,0.0
4782,A1Z3BV3D14NBFR,B007WTAJTO,Wesley Haley,"[1, 1]",Bought this for my Note 10.1 for some extra sp...,5.0,Works great in my Note 10.1,1363910400,2013-03-22,626,1,1,0,1,1.0
595,A3P0ZOIUOUFPBT,B007WTAJTO,B. HILL,"[1, 2]",Worked for awhile and then android would blow ...,3.0,Two failed cards. Did not work in HTC EVO LTE...,1355270400,2012-12-12,726,1,2,1,0,0.5
694,AYXYB6KVF0C6K,B007WTAJTO,Brandon Dees,"[0, 0]",No complaints about this card. It's much faste...,5.0,Good value for a fast microSD,1391644800,2014-06-02,189,0,0,0,0,0.0
418,A361M14PU2GUEG,B007WTAJTO,"Angry Ryan ""Ryan A. Forrest""","[1, 2]",I bought a class 10 Samsung and returned it af...,5.0,SanDisk Micro Class 10 card,1342137600,2012-07-13,878,1,2,1,0,0.5


In [5]:
def wilson_lower_bound(up, down, confidence = 0.95):

    n = up + down
    if n == 0:
        return 0
    z = st.norm.ppf(1 - confidence /2)
    phat = 1.0 * up / n
    return (phat + z*z / (2 * n) - z*math.sqrt((phat *(1-phat) + z*z / (4*n)) /n)) / (1 + z*z /n)

df['wilson_lower_bound'] = df.apply(lambda row: wilson_lower_bound(row['helpful_yes'], row['total_vote'] - row['helpful_yes']), axis=1)

df_sorted = df.sort_values(by='wilson_lower_bound', ascending=False)
df_sorted.head(20)

Unnamed: 0,reviewerID,asin,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime,day_diff,helpful_yes,total_vote,helpful_no,score_pos_neg_diff,score_average_rating,wilson_lower_bound
1465,A6I8KXYK24RTB,B007WTAJTO,D. Stein,"[7, 7]","I for one have not bought into Google's, or an...",4.0,Finally.,1397433600,2014-04-14,238,7,7,0,7,1.0,0.999439
1609,A2TPXOZSU1DACQ,B007WTAJTO,Eskimo,"[7, 7]",I have always been a sandisk guy. This cards ...,5.0,Bet you wish you had one of these,1395792000,2014-03-26,257,7,7,0,7,1.0,0.999439
4072,A22GOZTFA02O2F,B007WTAJTO,"sb21 ""sb21""","[6, 6]",I used this for my Samsung Galaxy Tab 2 7.0 . ...,5.0,Used for my Samsung Galaxy Tab 2 7.0,1347321600,2012-11-09,759,6,6,0,6,1.0,0.999345
121,A2Z4VVF1NTJWPB,B007WTAJTO,A. Lee,"[5, 5]",Update: providing an update with regard to San...,5.0,ready for use on the Galaxy S3,1346803200,2012-05-09,943,5,5,0,5,1.0,0.999214
2583,A3MEPYZVTAV90W,B007WTAJTO,J. Wong,"[5, 5]",I bought this Class 10 SD card for my GoPro 3 ...,5.0,Works Great with a GoPro 3 Black!,1370649600,2013-08-06,489,5,5,0,5,1.0,0.999214
1753,ALPLKR59QMBUX,B007WTAJTO,G. Becker,"[5, 5]",Puchased this card right after I received my S...,5.0,Use Nothing Other Than the Best,1350864000,2012-10-22,777,5,5,0,5,1.0,0.999214
1072,A2O96COBMVY9C4,B007WTAJTO,Crysis Complex,"[5, 5]",What more can I say? The 64GB micro SD works f...,5.0,Works wonders for the Galaxy Note 2!,1349395200,2012-05-10,942,5,5,0,5,1.0,0.999214
1142,A1PLHPPAJ5MUXG,B007WTAJTO,Daniel Pham(Danpham_X @ yahoo. com),"[5, 5]",As soon as I saw that this card was announced ...,5.0,Great large capacity card,1396396800,2014-02-04,307,5,5,0,5,1.0,0.999214
1006,A1GDY4MP5QVXPD,B007WTAJTO,cmoisuperlea,"[4, 4]",I own a couple of these and I've tested them o...,5.0,Works as described.,1388361600,2013-12-30,343,4,4,0,4,1.0,0.999018
1739,A1KJZ6NPUYA22D,B007WTAJTO,Gary A. Schmitt,"[4, 4]",I was a bit concerned when I first ordered thi...,5.0,64GB sdxc - works perfectly with Samsung Galax...,1372291200,2013-06-27,529,4,4,0,4,1.0,0.999018
