In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd

# File to Load (Remember to change these)
vine_review_data = "Resources/vine_table.csv"

# Read the Data
df = pd.read_csv(vine_review_data)
df

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
0,RTIS3L2M1F5SM,5,0,0,N,Y
1,R1ZV7R40OLHKD,5,0,0,N,Y
2,R3BH071QLH8QMC,1,0,1,N,Y
3,R127K9NTSXA2YH,3,0,0,N,Y
4,R32ZWUXDJPW27Q,4,0,0,N,Y
...,...,...,...,...,...,...
1785992,RPC430LWZJ60T,5,1,1,N,N
1785993,R347MZT5FH6HRJ,5,2,2,N,N
1785994,RJ5BETZP0VIUS,1,0,2,N,N
1785995,R85QTDO2KZMGO,1,1,3,N,N


In [2]:
#filter df to reviews that have more than 20 total_votes
df=df[df['total_votes'] > 20]
df

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
55,R4PKAZRQJJX14,1,21,34,N,N
74,R2CI0Y288CC7E2,1,21,35,N,Y
209,R127WEQY2FM1T3,1,147,175,N,Y
289,R3EZ0EPYLDA34S,1,14,31,N,Y
483,R2FJ94555FZH32,2,55,60,N,N
...,...,...,...,...,...,...
1785641,RCMDCDJR16IKW,4,27,30,N,N
1785688,R3GYUWLD9FWCPS,5,30,30,N,N
1785714,R26KS4Q9G04FIV,2,8,21,N,N
1785788,RU0J1ZMBCLD27,5,27,29,N,N


In [3]:
#filter df to reviews to rows where the number of helpful_votes divided by total_votes is equal to or greater than 50%
df["helpful_vote_rate"] = (df["helpful_votes"]/df['total_votes'])
df=df[df['helpful_vote_rate'] >= 0.5]
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase,helpful_vote_rate
55,R4PKAZRQJJX14,1,21,34,N,N,0.617647
74,R2CI0Y288CC7E2,1,21,35,N,Y,0.600000
209,R127WEQY2FM1T3,1,147,175,N,Y,0.840000
483,R2FJ94555FZH32,2,55,60,N,N,0.916667
537,R1U3AR67RE273L,1,51,65,N,Y,0.784615
...,...,...,...,...,...,...,...
1785614,R31UKJUAJNX7XX,5,35,50,N,N,0.700000
1785641,RCMDCDJR16IKW,4,27,30,N,N,0.900000
1785688,R3GYUWLD9FWCPS,5,30,30,N,N,1.000000
1785788,RU0J1ZMBCLD27,5,27,29,N,N,0.931034


In [4]:
#Filter to rows that are vine program participants
df_in_vine=df[df['vine'] == "Y"]
df_in_vine

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase,helpful_vote_rate
32611,R3KKUSGFZWSUIY,5,56,63,Y,N,0.888889
33112,R10FO5UKKVZBK2,3,23,23,Y,N,1.000000
69680,RM4KSGEOR7MU1,5,19,24,Y,N,0.791667
155361,RG7VRMYLEXD23,4,22,26,Y,N,0.846154
239327,R11O4YSCPSNL6L,3,20,26,Y,N,0.769231
...,...,...,...,...,...,...,...
1456862,RLPTVGLU0JQIP,3,42,45,Y,N,0.933333
1463333,R3ASJ9SENYYYI0,5,40,46,Y,N,0.869565
1481162,RNU8PK609WT6P,4,347,362,Y,N,0.958564
1506354,R8YT75NJW0CM9,4,37,40,Y,N,0.925000


In [5]:
#Filter to rows that are not vine program participants
df_not_vine=df[df['vine'] == "N"]
df_not_vine

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase,helpful_vote_rate
55,R4PKAZRQJJX14,1,21,34,N,N,0.617647
74,R2CI0Y288CC7E2,1,21,35,N,Y,0.600000
209,R127WEQY2FM1T3,1,147,175,N,Y,0.840000
483,R2FJ94555FZH32,2,55,60,N,N,0.916667
537,R1U3AR67RE273L,1,51,65,N,Y,0.784615
...,...,...,...,...,...,...,...
1785614,R31UKJUAJNX7XX,5,35,50,N,N,0.700000
1785641,RCMDCDJR16IKW,4,27,30,N,N,0.900000
1785688,R3GYUWLD9FWCPS,5,30,30,N,N,1.000000
1785788,RU0J1ZMBCLD27,5,27,29,N,N,0.931034


In [6]:
#Determine the number of reviews
num_tot_rev = len(df. index)
print(f"There is a total of {num_tot_rev} reviews")

#Determine the number of 5-star reviews
num_5_star_rev = len(df.loc[df['star_rating'] == 5])
print(f"There is a total of {num_5_star_rev} 5-star reviews")

#the percentage of 5-star reviews for the two types of review (paid vs unpaid).
pct_5s_rev_paid = (len(df_in_vine.loc[df_in_vine['star_rating'] == 5])/len(df_in_vine))
print(f"Vine review participants give 5-star reviews at a rate of {pct_5s_rev_paid:.2f}%")

pct_5s_rev_unpaid = (len(df_not_vine.loc[df_not_vine['star_rating'] == 5])/len(df_not_vine))
print(f"Non vine review participants give 5-star reviews at a rate of {pct_5s_rev_unpaid:.2f}%")

There is a total of 37921 reviews
There is a total of 14748 5-star reviews
Vine review participants give 5-star reviews at a rate of 0.49%
Non vine review participants give 5-star reviews at a rate of 0.39%
