In [2]:
# import pandas
import pandas as pd

In [3]:
import numpy as np

In [4]:
# read in csv file
vine_df = pd.read_csv("vine_table.csv")

In [5]:
vine_df.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
0,RTIS3L2M1F5SM,5,0,0,N,Y
1,R1ZV7R40OLHKD,5,0,0,N,Y
2,R3BH071QLH8QMC,1,0,1,N,Y
3,R127K9NTSXA2YH,3,0,0,N,Y
4,R32ZWUXDJPW27Q,4,0,0,N,Y


In [6]:
len(vine_df)

1785997

In [7]:
# Filter the vine_df dataframe for reviews that have 20 or more total votes, and create new df
popular_reviews = vine_df.loc[vine_df["total_votes"] >= 20]
popular_reviews.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
55,R4PKAZRQJJX14,1,21,34,N,N
74,R2CI0Y288CC7E2,1,21,35,N,Y
209,R127WEQY2FM1T3,1,147,175,N,Y
289,R3EZ0EPYLDA34S,1,14,31,N,Y
483,R2FJ94555FZH32,2,55,60,N,N


In [8]:
len(popular_reviews)

65379

In [9]:
# Filter the dataframe for reviews where helpful votes are >= 50% of total votes
helpful_reviews = popular_reviews.loc[popular_reviews["helpful_votes"] / popular_reviews["total_votes"] >= 0.5]
helpful_reviews.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
55,R4PKAZRQJJX14,1,21,34,N,N
74,R2CI0Y288CC7E2,1,21,35,N,Y
209,R127WEQY2FM1T3,1,147,175,N,Y
483,R2FJ94555FZH32,2,55,60,N,N
537,R1U3AR67RE273L,1,51,65,N,Y


In [21]:
# Get the number of vine and non-vine reviews
review_count = helpful_reviews.groupby(["vine"]).count()["review_id"]
review_count

vine
N    40471
Y       94
Name: review_id, dtype: int64

In [24]:
# Get the number of 5 star vine and non-vine reviews
five_star_count = helpful_reviews.loc[helpful_reviews["star_rating"] == 5].groupby(["vine"]).count()["star_rating"]
five_star_count

vine
N    15663
Y       48
Name: star_rating, dtype: int64

In [27]:
# Get the percentage of five star reviews for vine and non-vine reviews
percent_five_star = five_star_count / review_count * 100
percent_five_star

vine
N    38.701786
Y    51.063830
dtype: float64

In [28]:
# create a summary df
summary_df = pd.DataFrame({
    "Total Reviews":review_count,
    "5 Star Reviews": five_star_count,
    "% 5 Star Reviews": percent_five_star})

summary_df

Unnamed: 0_level_0,Total Reviews,5 Star Reviews,% 5 Star Reviews
vine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
N,40471,15663,38.701786
Y,94,48,51.06383


In [30]:
# format the summary df
summary_df["% 5 Star Reviews"] = summary_df["% 5 Star Reviews"].map("{:.1f}".format)
summary_df

Unnamed: 0_level_0,Total Reviews,5 Star Reviews,% 5 Star Reviews
vine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
N,40471,15663,38.7
Y,94,48,51.1


In [11]:
helpful_reviews_vine = helpful_reviews.loc[helpful_reviews["vine"] == "Y"]
helpful_reviews_vine.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
32611,R3KKUSGFZWSUIY,5,56,63,Y,N
33112,R10FO5UKKVZBK2,3,23,23,Y,N
69680,RM4KSGEOR7MU1,5,19,24,Y,N
155361,RG7VRMYLEXD23,4,22,26,Y,N
239327,R11O4YSCPSNL6L,3,20,26,Y,N


In [12]:
review_count_vine = helpful_reviews_vine["review_id"].count()
review_count_vine

94

In [13]:
five_star_vine = helpful_reviews_vine["star_rating"].loc[helpful_reviews_vine["star_rating"] == 5].count()
five_star_vine

48

In [14]:
percent_5star_vine = five_star_vine / review_count_vine * 100
percent_5star_vine.round(1)

51.1

In [15]:
helpful_not_vine = helpful_reviews.loc[helpful_reviews["vine"] == "N"]
helpful_not_vine.head()

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
55,R4PKAZRQJJX14,1,21,34,N,N
74,R2CI0Y288CC7E2,1,21,35,N,Y
209,R127WEQY2FM1T3,1,147,175,N,Y
483,R2FJ94555FZH32,2,55,60,N,N
537,R1U3AR67RE273L,1,51,65,N,Y


In [16]:
review_count_not_vine = helpful_not_vine["review_id"].count()
review_count_not_vine

40471

In [17]:
five_star_not_vine = helpful_not_vine["star_rating"].loc[helpful_not_vine["star_rating"] == 5].count()
five_star_not_vine

15663

In [18]:
percent_5star_nv = five_star_not_vine / review_count_not_vine * 100
percent_5star_nv.round(1)

38.7