# 1. Read Data

In [1]:
import pandas as pd
import numpy as np
import warnings
from textblob import TextBlob

warnings.filterwarnings("ignore")

# Read the datasets
dfParentApps = pd.read_csv("googleplaystore.csv")
dfParentReviews = pd.read_csv("googleplaystore_user_reviews.csv")
dfChildApps = pd.read_csv("Apps.csv")
dfChildReviews = pd.read_csv("Reviews.csv")

# Display the first few rows of each dataframe (optional)
print(dfParentApps.head())
print(dfParentReviews.head())
print(dfChildApps.head())
print(dfChildReviews.head())

                                                 App        Category  Rating  \
0     Photo Editor & Candy Camera & Grid & ScrapBook  ART_AND_DESIGN     4.1   
1                                Coloring book moana  ART_AND_DESIGN     3.9   
2  U Launcher Lite – FREE Live Cool Themes, Hide ...  ART_AND_DESIGN     4.7   
3                              Sketch - Draw & Paint  ART_AND_DESIGN     4.5   
4              Pixel Draw - Number Art Coloring Book  ART_AND_DESIGN     4.3   

  Reviews  Size     Installs  Type Price Content Rating  \
0     159   19M      10,000+  Free     0       Everyone   
1     967   14M     500,000+  Free     0       Everyone   
2   87510  8.7M   5,000,000+  Free     0       Everyone   
3  215644   25M  50,000,000+  Free     0           Teen   
4     967  2.8M     100,000+  Free     0       Everyone   

                      Genres      Last Updated         Current Ver  \
0               Art & Design   January 7, 2018               1.0.0   
1  Art & Design;Pretend 

# 2. Rename Columns

In [2]:
# Rename columns to have consistent names across datasets
dfParentApps.rename(columns={"App": "App Name"}, inplace=True)
dfParentReviews.rename(columns={"App": "App Name"}, inplace=True)
dfChildApps.rename(columns={"title": "App Name", "appId": "app_Id"}, inplace=True)
dfChildReviews.rename(columns={"repliedAt": "Review Date"}, inplace=True)

# Display the first few rows of each dataframe to verify changes (optional)
print(dfParentApps.head())
print(dfParentReviews.head())
print(dfChildApps.head())
print(dfChildReviews.head())

                                            App Name        Category  Rating  \
0     Photo Editor & Candy Camera & Grid & ScrapBook  ART_AND_DESIGN     4.1   
1                                Coloring book moana  ART_AND_DESIGN     3.9   
2  U Launcher Lite – FREE Live Cool Themes, Hide ...  ART_AND_DESIGN     4.7   
3                              Sketch - Draw & Paint  ART_AND_DESIGN     4.5   
4              Pixel Draw - Number Art Coloring Book  ART_AND_DESIGN     4.3   

  Reviews  Size     Installs  Type Price Content Rating  \
0     159   19M      10,000+  Free     0       Everyone   
1     967   14M     500,000+  Free     0       Everyone   
2   87510  8.7M   5,000,000+  Free     0       Everyone   
3  215644   25M  50,000,000+  Free     0           Teen   
4     967  2.8M     100,000+  Free     0       Everyone   

                      Genres      Last Updated         Current Ver  \
0               Art & Design   January 7, 2018               1.0.0   
1  Art & Design;Pretend 

# 3. Merge Dataframes

In [3]:
# Merge the main and child datasets
mergedMain = pd.merge(dfParentApps, dfParentReviews, on='App Name', how='left')
mergedChild = pd.merge(dfChildApps, dfChildReviews, on='app_Id', how='left')
mergedAll = pd.merge(mergedMain, mergedChild[['App Name', 'Review Date', 'content']], on='App Name', how='left')

# Drop duplicate rows
mergedAll.drop_duplicates(inplace=True)

# Display the first few rows of the merged dataframe (optional)
print(mergedAll.head())

                                         App Name        Category  Rating  \
0  Photo Editor & Candy Camera & Grid & ScrapBook  ART_AND_DESIGN     4.1   
1                             Coloring book moana  ART_AND_DESIGN     3.9   
2                             Coloring book moana  ART_AND_DESIGN     3.9   
3                             Coloring book moana  ART_AND_DESIGN     3.9   
4                             Coloring book moana  ART_AND_DESIGN     3.9   

  Reviews Size  Installs  Type Price Content Rating  \
0     159  19M   10,000+  Free     0       Everyone   
1     967  14M  500,000+  Free     0       Everyone   
2     967  14M  500,000+  Free     0       Everyone   
3     967  14M  500,000+  Free     0       Everyone   
4     967  14M  500,000+  Free     0       Everyone   

                      Genres      Last Updated Current Ver   Android Ver  \
0               Art & Design   January 7, 2018       1.0.0  4.0.3 and up   
1  Art & Design;Pretend Play  January 15, 2018       2

# 4. Convert Data Types

In [4]:
# Function to convert 'Size' values
def convert_size(size_str):
    try:
        if 'M' in size_str:
            return float(size_str.replace('M', '')) * 1_000_000
        elif 'k' in size_str:
            return float(size_str.replace('k', '')) * 1_000
        else:
            return float(size_str)
    except ValueError:
        return np.nan

# Convert 'Size' column
mergedAll['Size'] = mergedAll['Size'].astype(str).apply(convert_size)
mergedAll['Size'] = mergedAll['Size'].astype(float)

# Clean and convert 'Installs' and 'Price' columns
mergedAll['Installs'] = mergedAll['Installs'].astype(str).str.replace('+', '').str.replace(',', '')
mergedAll['Installs'] = pd.to_numeric(mergedAll['Installs'], errors='coerce').fillna(0).astype(int)
mergedAll['Price'] = mergedAll['Price'].astype(str).str.replace('$', '', regex=False)
mergedAll['Price'] = pd.to_numeric(mergedAll['Price'], errors='coerce').fillna(0.0).astype(float)

# Convert other columns to appropriate types
mergedAll['Rating'] = pd.to_numeric(mergedAll['Rating'], errors='coerce')
mergedAll['Review Date'] = pd.to_datetime(mergedAll['Review Date'], errors='coerce')
mergedAll['Review Date'] = mergedAll['Review Date'].dt.strftime('%d-%m-%Y')

# Display the first few rows of the dataframe to verify changes (optional)
print(mergedAll.head())


                                         App Name        Category  Rating  \
0  Photo Editor & Candy Camera & Grid & ScrapBook  ART_AND_DESIGN     4.1   
1                             Coloring book moana  ART_AND_DESIGN     3.9   
2                             Coloring book moana  ART_AND_DESIGN     3.9   
3                             Coloring book moana  ART_AND_DESIGN     3.9   
4                             Coloring book moana  ART_AND_DESIGN     3.9   

  Reviews        Size  Installs  Type  Price Content Rating  \
0     159  19000000.0     10000  Free    0.0       Everyone   
1     967  14000000.0    500000  Free    0.0       Everyone   
2     967  14000000.0    500000  Free    0.0       Everyone   
3     967  14000000.0    500000  Free    0.0       Everyone   
4     967  14000000.0    500000  Free    0.0       Everyone   

                      Genres      Last Updated Current Ver   Android Ver  \
0               Art & Design   January 7, 2018       1.0.0  4.0.3 and up   
1  Art

# 5. Sentiment Analysis

In [5]:
# Function to perform sentiment analysis
def analyze_sentiment(text):
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    subjectivity = blob.sentiment.subjectivity
    if polarity > 0:
        sentiment = 'positive'
    elif polarity < 0:
        sentiment = 'negative'
    else:
        sentiment = 'neutral'
    return sentiment, polarity, subjectivity

# Apply sentiment analysis to the 'content' column
mergedAll['sentiment'], mergedAll['polarity'], mergedAll['subjectivity'] = zip(*mergedAll['content'].fillna('').map(analyze_sentiment))

# Display the first few rows of the dataframe to verify changes (optional)
print(mergedAll.head())


                                         App Name        Category  Rating  \
0  Photo Editor & Candy Camera & Grid & ScrapBook  ART_AND_DESIGN     4.1   
1                             Coloring book moana  ART_AND_DESIGN     3.9   
2                             Coloring book moana  ART_AND_DESIGN     3.9   
3                             Coloring book moana  ART_AND_DESIGN     3.9   
4                             Coloring book moana  ART_AND_DESIGN     3.9   

  Reviews        Size  Installs  Type  Price Content Rating  \
0     159  19000000.0     10000  Free    0.0       Everyone   
1     967  14000000.0    500000  Free    0.0       Everyone   
2     967  14000000.0    500000  Free    0.0       Everyone   
3     967  14000000.0    500000  Free    0.0       Everyone   
4     967  14000000.0    500000  Free    0.0       Everyone   

                      Genres  ...   Android Ver  \
0               Art & Design  ...  4.0.3 and up   
1  Art & Design;Pretend Play  ...  4.0.3 and up   
2  Ar

# 6. Split Data and Save to CSV

In [7]:
# Define the pre-COVID date
pre_covid_date = '2020-03-01'

# Split the data into pre-COVID and COVID periods
pre_covid_data = mergedAll[mergedAll['Review Date'] < pre_covid_date]
covid_data = mergedAll[mergedAll['Review Date'] >= pre_covid_date]

# Save the dataframes to CSV files
pre_covid_data.to_csv('/Users/hilalborklu/Downloads/pre_covid_data(1).csv', index=False)
covid_data.to_csv('/Users/hilalborklu/Downloads/covid_data(1).csv', index=False)

print("Pre-COVID and COVID datasets have been saved to CSV files.")


Pre-COVID and COVID datasets have been saved to CSV files.
