In [15]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import os
import re
import string

## Data Extract and Clean-up  - Google PlayStore.

In [16]:
# Import the google play store data. 
csv_path1 = "Resources/googleplaystore.csv"
GooglePlayStore_original = pd.read_csv(csv_path1, encoding = 'UTF-8')

#GooglePlayStore_original.head()

G_Record_count = GooglePlayStore_original.count()
#print(G_Record_count)

# Delete unnecessary columns from Google Play Store file

GooglePlayStore_df = GooglePlayStore_original.drop(['Installs', 'Last Updated', 'Current Ver', 'Android Ver'], axis=1)
GPS_df=(GooglePlayStore_df[['App', 'Size', 'Price','Type','Reviews','Rating','Content Rating','Genres','Category']])
GPS_df=GPS_df.assign(currency="USD" )

# Column name convertion
GooglePlayStore_cols_df = GPS_df.rename(
   columns={"App": "App_Name",
            "Size": "App_Size",
            "currency":"currency",
            "Price": "App_Price",
            "Type": "Type",
            "Reviews": "Review_count",
            "Rating": "User_Rating",
            "Content Rating": "Content_Rating",
            "Category" : "Category"
           })


GooglePlayStore_cols_df['App_Price'].fillna(0, inplace=True)
GooglePlayStore_cols_df["App_Price"] = GooglePlayStore_cols_df["App_Price"].astype(float)

GooglePlayStore_cols_df['Review_count'].fillna(0, inplace=True)
GooglePlayStore_cols_df["Review_count"] = GooglePlayStore_cols_df["Review_count"].astype(int)


#GooglePlayStore_cols_df.head()
GooglePlayStore_cols_df = GooglePlayStore_cols_df[["App_Name", "App_Size","currency","App_Price","Type", "Review_count", "User_Rating","Content_Rating", "Category"]]

# Google App Name clean-up
# Retain Alphanumeric and special characters in the string; Replace others with SPACES.

Googl_App_Name = GooglePlayStore_cols_df.App_Name.apply(lambda x: re.sub(r'[^\\/@+\-:,|#a-zA-Z0-9 ]+' , '', x))
GooglePlayStore_cols_df['App_Name'] = Googl_App_Name

GooglePlayStore_cols_df.head()

# Write Googl_App_Details to CSV file for Data transformation
G_filename = f"Google_data_ext.csv"
GooglePlayStore_cols_df.to_csv(G_filename)


## Extract and Clean-up AppleStore data.

In [17]:
# Import the Apple store data.
#AppleStore_original = pd.read_csv(os.path.join('Resources', 'AppleStore.csv',encoding='UTF-8'))
#A_Record_count = AppleStore_original.count()

csv_path2 = "Resources/AppleStore.csv"
AppleStore_original = pd.read_csv(csv_path2, encoding = 'UTF-8')

# print(A_Record_count)
# AppleStore_original.head()

# iOS App Name clean-up

# Retain Alphanumeric and special characters in the string; Replace others with SPACES.
AppleStore_df = AppleStore_original.drop(['id', 'rating_count_ver', 'user_rating_ver', 'ver', 'sup_devices.num', 'ipadSc_urls.num','lang.num','vpp_lic' ], axis=1)
AS_df=(AppleStore_df[['track_name', 'size_bytes', 'price','rating_count_tot','user_rating','cont_rating','prime_genre','currency']])
AS_df = AS_df.assign(Type="",currency="USD" )

#AS_df.head()

# Column name convertion
AppleStore_cols_df = AS_df.rename(
   columns={"track_name": "App_Name",
            "size_bytes": "App_Size",
            "currency" : "currency",
            "price": "App_Price",
            "Type":"Type",
            "rating_count_tot": "Review_count",
            "user_rating": "User_Rating",
            "cont_rating": "Content_Rating",
            "prime_genre": "Category"
           
           })

AppleStore_cols_df["App_Price"] .fillna(0, inplace=True)
AppleStore_cols_df["App_Price"] = AppleStore_cols_df["App_Price"].astype(float)

AppleStore_cols_df['Review_count'].fillna(0, inplace=True)
AppleStore_cols_df["Review_count"] = AppleStore_cols_df["Review_count"].astype(int)


AppleStore_cols_df = AppleStore_cols_df[["App_Name", "App_Size","currency","App_Price","Type", "Review_count","User_Rating","Content_Rating","Category"]]
AppleStore_cols_df.head()

# List of possible Alphanumeric values and special characters in App Name filed.

valid_chars =  (r'[^ 0-9a-zA-Z\/@+\-:,|#]+')
AAPL_App_Name = AppleStore_cols_df.App_Name.apply(lambda x: re.sub(valid_chars, '', x))
AppleStore_cols_df['App_Name'] = AAPL_App_Name

# Write APPL App Details to CSV file for Data transformation
APPL_filename = f"Apple_data_ext.csv"
AppleStore_cols_df.to_csv(APPL_filename)

7167                        脱出ゲーム　わたしをみつけて　-おじいさんとわたしの物語-
7168                        Escape from the frigid Igloo.
7169          Talking Santa - Video santa claus calls you
7170                                            CTFxCmoji
7171                      Room Escape Game - Santa's Room
7172                                 Rescue the Enchanter
7173                                   My Diary - 你的名字非官方
7174    VR Thrills: Roller Coaster 360 (Google Cardboard)
7175     Santa Kids Hair Salon - Christmas Makeover Games
7176                                   Human Juggling Cup
7177                             Again - room escape game
7178                                    Saloons Unleashed
7179               Fam — Group video calling for iMessage
7180                     Laurie Hernandez the Human Emoji
7181                                                 剑倚手游
7182                                    camera for filter
7183                                      Survivalcraft 2
7184                                      剑客情缘-高爆率高掉落天天疯玩
7185                                       问仙奇遇-新玩法新套装嗨到爆
7186                   脱出ゲーム - 書道教室 -  "漢字"の謎に満ちた部屋からの 脱出
7187                            Escape Game: illumination
7188         Demolition Derby Virtual Reality (VR) Racing
7189                           飞刀传奇-动作武侠热血江湖即时PK传奇（登录爆金装）
