In [1]:
# Imports
import pandas as pd

In [2]:
# Read in Vine CSV 
vine_df = pd.read_csv("resources/vine_table.csv")
vine_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
0,RTIS3L2M1F5SM,5,0,0,N,Y
1,R1ZV7R40OLHKD,5,0,0,N,Y
2,R3BH071QLH8QMC,1,0,1,N,Y
3,R127K9NTSXA2YH,3,0,0,N,Y
4,R32ZWUXDJPW27Q,4,0,0,N,Y


In [3]:
# 1. Filter data where total_votes count is equal to or greater than 20
total_votes_df = vine_df[vine_df["total_votes"] >= 20]
total_votes_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
55,R4PKAZRQJJX14,1,21,34,N,N
74,R2CI0Y288CC7E2,1,21,35,N,Y
209,R127WEQY2FM1T3,1,147,175,N,Y
289,R3EZ0EPYLDA34S,1,14,31,N,Y
483,R2FJ94555FZH32,2,55,60,N,N


In [4]:
# 2. Filter total_votes_df to rows where helpful_votes / total_votes >= 50%
helpful_votes_df = total_votes_df[total_votes_df["helpful_votes"]/total_votes_df["total_votes"] >= 0.50]
helpful_votes_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
55,R4PKAZRQJJX14,1,21,34,N,N
74,R2CI0Y288CC7E2,1,21,35,N,Y
209,R127WEQY2FM1T3,1,147,175,N,Y
483,R2FJ94555FZH32,2,55,60,N,N
537,R1U3AR67RE273L,1,51,65,N,Y


In [5]:
# 3. Filter helpful_votes_df to where a review was part of the Vine program (paid)
paid_df = helpful_votes_df[helpful_votes_df["vine"] == "Y"]
paid_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
32611,R3KKUSGFZWSUIY,5,56,63,Y,N
33112,R10FO5UKKVZBK2,3,23,23,Y,N
69680,RM4KSGEOR7MU1,5,19,24,Y,N
155361,RG7VRMYLEXD23,4,22,26,Y,N
239327,R11O4YSCPSNL6L,3,20,26,Y,N


In [6]:
# 4. Filter helpful_votes_df to where a review was not part of the Vine program (unpaid)
unpaid_df = helpful_votes_df[helpful_votes_df["vine"] == "N"]
unpaid_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
55,R4PKAZRQJJX14,1,21,34,N,N
74,R2CI0Y288CC7E2,1,21,35,N,Y
209,R127WEQY2FM1T3,1,147,175,N,Y
483,R2FJ94555FZH32,2,55,60,N,N
537,R1U3AR67RE273L,1,51,65,N,Y


In [7]:
# 5a. Determine the number of 5-star reviews, total reviews and percentage of 5-star reviews for the paid reviews
five_star_paid = paid_df[paid_df["star_rating"] == 5].shape[0]
reviews_paid = paid_df.shape[0]
five_star_percentage_paid = five_star_paid/reviews_paid

In [8]:
print(f"Paid Five-Star Count: {five_star_paid}")
print(f"Paid Review Count: {reviews_paid}")
print(f"Paid Five-Star Percentage: {five_star_percentage_paid}")

Paid Five-Star Count: 48
Paid Review Count: 94
Paid Five-Star Percentage: 0.5106382978723404


In [9]:
# 5b. Determine the number of 5-star reviews, total reviews and percentage of 5-star reviews for the unpaid reviews
five_star_unpaid = unpaid_df[unpaid_df["star_rating"] == 5].shape[0]
reviews_unpaid = unpaid_df.shape[0]
five_star_percentage_unpaid = five_star_unpaid/reviews_unpaid

In [10]:
print(f"Unpaid Five-Star Count: {five_star_unpaid}")
print(f"Unpaid Review Count: {reviews_unpaid}")
print(f"Unpaid Five-Star Percentage: {five_star_percentage_unpaid}")

Unpaid Five-Star Count: 15663
Unpaid Review Count: 40471
Unpaid Five-Star Percentage: 0.38701786464381904
