In [11]:
from google_play_scraper import reviews
import pandas as pd

# List of popular DECA Games titles' package names and names
games = [
    {"name": "The Walking Dead: No Man's Land", "package": "com.nextgames.android.twd"},
    {"name": "Dead Trigger 2", "package": "com.madfingergames.deadtrigger2"},
    {"name": "Shadowgun Legends", "package": "com.madfingergames.legends"},
    {"name": "Dead Trigger", "package": "com.madfingergames.deadtrigger"},
    {"name": "Samurai II: Vengeance", "package": "com.madfingergames.SamuraiIIAll"},
    {"name": "Celtic Heroes", "package": "com.onethumbmobile.celticheroes"},
    {"name": "Unkilled", "package": "com.madfingergames.unkilled"},
    {"name": "Call me a Legend", "package": "com.sixwaves.cml"},
    {"name": "Call me a Gangsta", "package": "com.empirestudiosinc.cmg"},
    {"name": "Robbery Bob", "package": "com.chillingo.robberybobfree.android.row"},
    {"name": "Robbery Bob 2", "package": "com.chillingo.robberybob2.android.gplay"},
    {"name": "Gods and Glory", "package": "com.fridaysgames.godsandglory"},
    {"name": "DragonVale", "package": "com.backflipstudios.android.dragonvale"}
]

# List of countries to scrape reviews from
countries = {'us': 'United States', 'gb': 'United Kingdom', 'de': 'Germany', 'fr': 'France', 
             'it': 'Italy', 'es': 'Spain', 'ru': 'Russia', 'jp': 'Japan', 'kr': 'South Korea', 
             'in': 'India', 'cn': 'China', 'sg': 'Singapore', 'ae': 'United Arab Emirates', 
             'ca': 'Canada', 'au': 'Australia', 'br': 'Brazil', 'mx': 'Mexico', 
             'za': 'South Africa', 'nz': 'New Zealand', 'tr': 'Turkey', 'vn': 'Vietnam', 
             'pl': 'Poland', 'nl': 'Netherlands'}

# Initialize list to store data
all_data = []

# Loop through each game and scrape reviews for each country
for game in games:
    for country in countries:
        result, _ = reviews(
            game['package'],
            lang='en',
            country=country,
            count=100,  # Limit to the first 100 reviews
        )
        
        # Store reviews with game name and country
        for review in result:
            all_data.append({
                'Game': game['name'],
                'Country': country,
                'Review': review['content'],
                'Score': review['score'],
                'At': review['at'],
                'Thumbs_Up': review['thumbsUpCount'],
            })

# Convert to DataFrame
data = pd.DataFrame(all_data)

# Save to CSV
data.to_csv('google_play_reviews.csv', index=False)

# Preview the data
data.head()


Unnamed: 0,Game,Country,Review,Score,At,Thumbs_Up
0,The Walking Dead: No Man's Land,us,Epic fun,5,2024-08-17 18:18:09,0
1,The Walking Dead: No Man's Land,us,Love it,5,2024-08-17 06:52:23,0
2,The Walking Dead: No Man's Land,us,All I can say is the game conceipt is pretty g...,5,2024-08-16 18:45:59,0
3,The Walking Dead: No Man's Land,us,Love this game 👍👍👍,5,2024-08-16 05:33:41,0
4,The Walking Dead: No Man's Land,us,Great game endless fun.,5,2024-08-16 02:29:18,0


In [17]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29900 entries, 0 to 29899
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Game       29900 non-null  object        
 1   Country    29900 non-null  object        
 2   Review     29900 non-null  object        
 3   Score      29900 non-null  int64         
 4   At         29900 non-null  datetime64[ns]
 5   Thumbs_Up  29900 non-null  int64         
dtypes: datetime64[ns](1), int64(2), object(3)
memory usage: 1.4+ MB


In [1]:
import pandas as pd

data = pd.read_csv("google_play_reviews.csv")
data.info()
data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29900 entries, 0 to 29899
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Game       29900 non-null  object
 1   Country    29900 non-null  object
 2   Review     29900 non-null  object
 3   Score      29900 non-null  int64 
 4   At         29900 non-null  object
 5   Thumbs_Up  29900 non-null  int64 
dtypes: int64(2), object(4)
memory usage: 1.4+ MB


Unnamed: 0,Game,Country,Review,Score,At,Thumbs_Up
0,The Walking Dead: No Man's Land,us,Epic fun,5,2024-08-17 18:18:09,0
1,The Walking Dead: No Man's Land,us,Love it,5,2024-08-17 06:52:23,0
2,The Walking Dead: No Man's Land,us,All I can say is the game conceipt is pretty g...,5,2024-08-16 18:45:59,0
3,The Walking Dead: No Man's Land,us,Love this game 👍👍👍,5,2024-08-16 05:33:41,0
4,The Walking Dead: No Man's Land,us,Great game endless fun.,5,2024-08-16 02:29:18,0


In [3]:
data = data.drop('Thumbs_Up' , axis = 1)
data.head()

Unnamed: 0,Game,Country,Review,Score,At
0,The Walking Dead: No Man's Land,us,Epic fun,5,2024-08-17 18:18:09
1,The Walking Dead: No Man's Land,us,Love it,5,2024-08-17 06:52:23
2,The Walking Dead: No Man's Land,us,All I can say is the game conceipt is pretty g...,5,2024-08-16 18:45:59
3,The Walking Dead: No Man's Land,us,Love this game 👍👍👍,5,2024-08-16 05:33:41
4,The Walking Dead: No Man's Land,us,Great game endless fun.,5,2024-08-16 02:29:18


In [7]:
# Renaming column names to match with apple store data to later combine them.

data = data.rename(columns={'Score': 'Rating', 'At': 'Date'})
data.head()

Unnamed: 0,Game,Country,Review,Rating,Date
0,The Walking Dead: No Man's Land,us,Epic fun,5,2024-08-17 18:18:09
1,The Walking Dead: No Man's Land,us,Love it,5,2024-08-17 06:52:23
2,The Walking Dead: No Man's Land,us,All I can say is the game conceipt is pretty g...,5,2024-08-16 18:45:59
3,The Walking Dead: No Man's Land,us,Love this game 👍👍👍,5,2024-08-16 05:33:41
4,The Walking Dead: No Man's Land,us,Great game endless fun.,5,2024-08-16 02:29:18


# Sentiment Analysis

In [11]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

sent_int = SentimentIntensityAnalyzer()

text = data['Review'][1]
sent = sent_int.polarity_scores(text)
sent

{'neg': 0.0, 'neu': 0.192, 'pos': 0.808, 'compound': 0.6369}

In [13]:
# Adding a for loop to extract sentiment per title

title_neg_sentiment = []
title_pos_sentiment = []
title_neu_sentiment = []

length = len(data)
for x in range(0,length):
    try:
        text = data['Review'][x]
        sent_int = SentimentIntensityAnalyzer()
        sent = sent_int.polarity_scores(text)
        neg = sent['neg']
        pos = sent['pos']
        neu = sent['neu']
    except:
        neg = 0
        pos = 0
        neu = 0
    title_neg_sentiment.append(neg)
    title_pos_sentiment.append(pos)
    title_neu_sentiment.append(neu)

title_neg_sentiment = pd.Series(title_neg_sentiment)
title_pos_sentiment = pd.Series(title_pos_sentiment)
title_neu_sentiment = pd.Series(title_neu_sentiment)

data['Negative_sentiment'] = title_neg_sentiment
data['Positive_sentiment'] = title_pos_sentiment
data['Neutral_sentiment'] = title_neu_sentiment

data.head(10)

Unnamed: 0,Game,Country,Review,Rating,Date,Negative_sentiment,Positive_sentiment,Neutral_sentiment
0,The Walking Dead: No Man's Land,us,Epic fun,5,2024-08-17 18:18:09,0.0,0.767,0.233
1,The Walking Dead: No Man's Land,us,Love it,5,2024-08-17 06:52:23,0.0,0.808,0.192
2,The Walking Dead: No Man's Land,us,All I can say is the game conceipt is pretty g...,5,2024-08-16 18:45:59,0.083,0.281,0.637
3,The Walking Dead: No Man's Land,us,Love this game 👍👍👍,5,2024-08-16 05:33:41,0.0,0.344,0.656
4,The Walking Dead: No Man's Land,us,Great game endless fun.,5,2024-08-16 02:29:18,0.0,0.787,0.213
5,The Walking Dead: No Man's Land,us,Good action,5,2024-08-15 05:21:27,0.0,0.744,0.256
6,The Walking Dead: No Man's Land,us,"I'm relatively new with this game, so I don't ...",5,2024-08-15 00:34:25,0.0,0.293,0.707
7,The Walking Dead: No Man's Land,us,Amazing game,5,2024-08-15 00:04:41,0.0,0.792,0.208
8,The Walking Dead: No Man's Land,us,I've been playing this game for like 7 yrs,5,2024-08-14 23:02:27,0.0,0.381,0.619
9,The Walking Dead: No Man's Land,us,started the game but wanted to start a new gam...,1,2024-08-14 22:43:35,0.0,0.217,0.783


In [15]:
data.to_csv('google_store_processed.csv' , index = False)