In [1]:
import numpy as np
import pandas as pd
from collections import Counter
import seaborn as sns

In [2]:
reviews = pd.read_csv('Datasets/reviews.csv')
reviews.drop('app_id', axis=1, inplace=True)
reviews.head()

Unnamed: 0,app_name,review_text,review_score,review_votes
0,Counter-Strike,Ruined my life.,1,0
1,Counter-Strike,This will be more of a ''my experience with th...,1,1
2,Counter-Strike,This game saved my virginity.,1,0
3,Counter-Strike,• Do you like original games? • Do you like ga...,1,0
4,Counter-Strike,"Easy to learn, hard to master.",1,1


In [3]:
reviews.sample(10)

Unnamed: 0,app_name,review_text,review_score,review_votes
64365,Total War: EMPIRE - Definitive Edition,Best Total War in my opinion,1,0
3038852,Darkest Dungeon®,"i was relucant to get it, but i do like it not...",1,1
5122557,Undertale,This game is an amazing game for 5 reasons: T...,1,0
2477082,Space Engineers,Early Access Review,1,0
5060316,Oxenfree,This game is called many things - the talking ...,1,1
3114451,Fistful of Frags,11/10 would gabeN again. #WesternCsgoSimulator,1,0
4883414,Anno 2205,If you enjoy a nice peaceful resource managing...,1,0
5184044,Hurtworld,Early Access Review,1,0
2668895,The Binding of Isaac: Rebirth,This game is insanely awesome. Love the new gr...,1,0
3105793,Lords Of The Fallen,Runs like ♥♥♥♥ even on high end hardware. Also...,-1,0


In [4]:
reviews.describe()

Unnamed: 0,review_score,review_votes
count,6417106.0,6417106.0
mean,0.6394992,0.1472446
std,0.7687918,0.3543496
min,-1.0,0.0
25%,1.0,0.0
50%,1.0,0.0
75%,1.0,0.0
max,1.0,1.0


In [5]:
print(reviews['review_score'].value_counts())
print()
print(reviews['review_votes'].value_counts())

 1    5260420
-1    1156686
Name: review_score, dtype: int64

0    5472222
1     944884
Name: review_votes, dtype: int64


### Weighted review score
In order to give a proper score for each game, it would not be appropriate to just calculate the average (since in the case where an average of 1.0 across 1 review is against an average of 0.95 across 100 reviews, the latter would be considered better). Therefore an appropriate formula need to be chosen to calculate this. This [blog post](https://steamdb.info/blog/steamdb-rating/) gives a solution to this.

In [6]:
positive_reviews = reviews[reviews['review_score']==1].groupby('app_name').count()

In [7]:
# count and mean of review_score_and_votes grouped by game
reviews_by_group = reviews.groupby(reviews["app_name"]).review_score.agg(["count","mean"])
positive_reviews = reviews[reviews['review_score']==1].groupby('app_name').count()['review_score']
reviews_by_group['positive_rate'] = positive_reviews/reviews_by_group['count']
reviews_by_group.sample(5)

Unnamed: 0_level_0,count,mean,positive_rate
app_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Them - The Summoning,21,-0.52381,0.238095
The Seven Years War (1756-1763),42,0.380952,0.690476
Eron,116,0.155172,0.577586
Room 404,31,-0.032258,0.483871
Left 4 Dead 2,50980,0.85104,0.92552


In [8]:
import math 

def calc_weighted_score(rate, total):
    return rate - (rate - 0.5) * math.pow(2, -math.log10(total+1))

weighted_score = []
for r, t in zip(reviews_by_group['positive_rate'], reviews_by_group['count']):
    weighted_score.append(calc_weighted_score(r, t))
reviews_by_group['weighted_score'] = weighted_score
reviews_by_group.sample(5)

Unnamed: 0_level_0,count,mean,positive_rate,weighted_score
app_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Yosumin!,68,0.264706,0.632353,0.595354
Pocket Rumble,84,0.714286,0.857143,0.76338
SAMOLIOTIK,281,0.380783,0.690391,0.655554
Pet Store Panic,1,1.0,1.0,0.594164
Power of Love,9,0.111111,0.555556,0.527778


In [9]:
reviews_by_group.sort_values('weighted_score',ascending=False)[:10]

Unnamed: 0_level_0,count,mean,positive_rate,weighted_score
app_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Portal 2,38924,0.975542,0.987771,0.967521
Sid Meier's Civilization V,8693,0.99931,0.999655,0.967083
Half-Life 2,6483,0.999075,0.999537,0.963967
Factorio,12553,0.977854,0.988927,0.960391
South Park™: The Stick of Truth™,3479,0.9977,0.99885,0.95601
Portal,16293,0.963174,0.981587,0.955602
The Binding of Isaac: Rebirth,23306,0.957264,0.978632,0.955444
Terraria,84828,0.941576,0.970788,0.955329
"Star Wars: Battlefront 2 (Classic, 2005)",2976,1.0,1.0,0.954995
Unturned,5190,0.983044,0.991522,0.954099


In [10]:
reviews_by_group.to_csv('Datasets/reviews_processed.csv', index=True)