In [70]:
import numpy as np
import pandas as pd
from collections import Counter
import seaborn as sns

In [71]:
reviews = pd.read_csv('Datasets/dataset.csv')
reviews.drop('app_id', axis=1, inplace=True)
reviews.head()

Unnamed: 0,app_name,review_text,review_score,review_votes
0,Counter-Strike,Ruined my life.,1,0
1,Counter-Strike,This will be more of a ''my experience with th...,1,1
2,Counter-Strike,This game saved my virginity.,1,0
3,Counter-Strike,• Do you like original games? • Do you like ga...,1,0
4,Counter-Strike,"Easy to learn, hard to master.",1,1


In [72]:
reviews.sample(10)

Unnamed: 0,app_name,review_text,review_score,review_votes
1756501,Fallen Enchantress: Legendary Heroes,Fun game for those who like games such as Age ...,1,0
909259,Starbound,The game is good and the game play is fun,1,0
3358351,No Man's Sky,"It's really not that bad. Buy it, try it, and ...",1,0
3429765,This War of Mine,"Seemingly illogical or unfair to new players, ...",1,0
172120,Space Pirates and Zombies,Arcade style combat with RPG elements. Ship ar...,1,0
3509821,Metro 2033 Redux,after a very terrible release with game breaki...,1,0
925564,Starbound,Early Access Review,1,0
5134992,Undertale,I very highly recomentd this game to those who...,1,0
3987361,Call of Duty: Black Ops III,♥♥♥♥ing unrealistic flying people and ♥♥♥♥ ♥♥♥...,-1,0
774680,Batman™: Arkham Knight,"Yes, the FPS is capped. Yes there is the occas...",1,1


In [73]:
reviews.describe()

Unnamed: 0,review_score,review_votes
count,6417106.0,6417106.0
mean,0.6394992,0.1472446
std,0.7687918,0.3543496
min,-1.0,0.0
25%,1.0,0.0
50%,1.0,0.0
75%,1.0,0.0
max,1.0,1.0


In [74]:
print(reviews['review_score'].value_counts())
print()
print(reviews['review_votes'].value_counts())

 1    5260420
-1    1156686
Name: review_score, dtype: int64

0    5472222
1     944884
Name: review_votes, dtype: int64


### Weighted review score
In order to give a proper score for each game, it would not be appropriate to just calculate the average (since in the case where an average of 1.0 across 1 review is against an average of 0.95 across 100 reviews, the latter would be considered better). Therefore an appropriate formula need to be chosen to calculate this. This [blog post](https://steamdb.info/blog/steamdb-rating/) gives a solution to this.

In [75]:
positive_reviews = reviews[reviews['review_score']==1].groupby('app_name').count()

In [79]:
# count and mean of review_score_and_votes grouped by game
reviews_by_group = reviews.groupby(reviews["app_name"]).review_score.agg(["count","mean"])
positive_reviews = reviews[reviews['review_score']==1].groupby('app_name').count()['review_score']
reviews_by_group['positive_rate'] = positive_reviews/reviews_by_group['count']
reviews_by_group.sample(5)

Unnamed: 0_level_0,count,mean,positive_rate
app_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Nancy Drew: Danger By Design,12,0.0,0.5
TrackMania United Forever,361,0.927978,0.963989
Chess 2: The Sequel,68,0.058824,0.529412
Zombie Driver HD,552,0.706522,0.853261
Ultimate General: Gettysburg,1526,0.705111,0.852556


In [82]:
import math 

def calc_weighted_score(rate, total):
    return rate - (rate - 0.5) * math.pow(2, -math.log10(total+1))

weighted_score = []
for r, t in zip(reviews_by_group['positive_rate'], reviews_by_group['count']):
    weighted_score.append(calc_weighted_score(r, t))
reviews_by_group['weighted_score'] = weighted_score
reviews_by_group

Unnamed: 0_level_0,count,mean,positive_rate,weighted_score
app_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
神明的一天世界-God's One Day World,14,0.571429,0.785714,0.659272
! That Bastard Is Trying To Steal Our Gold !,26,-0.384615,0.307692,0.378996
"!""We Are The Dwarves""!",103,0.533981,0.766990,0.701026
"""Glow Ball"" - The billiard puzzle game",27,-0.629630,0.185185,0.300641
#SelfieTennis,63,0.492063,0.746032,0.675680
...,...,...,...,...
ルナティックドーン レジェンドパック,2,1.000000,1.000000,0.640795
ルナティックドーン 前途への道標,1,1.000000,1.000000,0.594164
丛林守望者（Ranger of the jungle）,5,0.600000,0.800000,0.625066
侠客风云传(Tale of Wuxia),190,0.673684,0.836842,0.767537


In [85]:
reviews_by_group.sort_values('weighted_score',ascending=False)[:10]

Unnamed: 0_level_0,count,mean,positive_rate,weighted_score
app_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Portal 2,38924,0.975542,0.987771,0.967521
Sid Meier's Civilization V,8693,0.99931,0.999655,0.967083
Half-Life 2,6483,0.999075,0.999537,0.963967
Factorio,12553,0.977854,0.988927,0.960391
South Park™: The Stick of Truth™,3479,0.9977,0.99885,0.95601
Portal,16293,0.963174,0.981587,0.955602
The Binding of Isaac: Rebirth,23306,0.957264,0.978632,0.955444
Terraria,84828,0.941576,0.970788,0.955329
"Star Wars: Battlefront 2 (Classic, 2005)",2976,1.0,1.0,0.954995
Unturned,5190,0.983044,0.991522,0.954099
