# Importing all dependencies

In [7]:
# Dependencies
%matplotlib notebook
import csv
import numpy as np
import pandas as pd
import matplotlib as plt

In [8]:
# Google and Apple mobile app data from Kaggle (in csv format)
google_app = "googleplaystore.csv"
apple_csv = "appleStore.csv"

# Read and display the CSV with Pandas
google_app_pd = pd.read_csv(google_app)
apple_store_pd = pd.read_csv(apple_csv)

In [24]:
print(google_app_pd.shape)
print(apple_store_pd.shape)

(10841, 13)
(7197, 17)


# Cleaning the data

In [9]:
# google_app_pd.head()

In [10]:
print(google_app_pd.columns)
print(apple_store_pd.columns)

Index(['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type',
       'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')
Index(['Unnamed: 0', 'id', 'track_name', 'size_bytes', 'currency', 'price',
       'rating_count_tot', 'rating_count_ver', 'user_rating',
       'user_rating_ver', 'ver', 'cont_rating', 'prime_genre',
       'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic'],
      dtype='object')


###  Removing irrelevant observations & Fixing Structural errors

In [11]:
# Rename apple data's columns to match with google's dataset
appledf=apple_store_pd.rename(columns = {'track_name':'App','size_bytes':'Size','price':'Price','rating_count_tot':'Reviews',\
                                'user_rating':'Rating','cont_rating':'Content Rating','prime_genre':'Genres'})
# appledf.columns

In [12]:
apple_store_data = appledf[['App','Size','Price',"Reviews","Rating","Content Rating","Genres"]]
google_app_data = google_app_pd[['App','Size','Price',"Reviews","Rating","Content Rating","Genres"]]

In [13]:
# print(apple_store_data.columns)
# print(google_app_data.columns)

In [14]:
# Adding a column "Source" in both the datasets to make it identifiable if the datasets are merged
apple_store_data["Source"] = "Apple"
google_app_data["Source"] = "Google"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [15]:
apple_store_data.head()

Unnamed: 0,App,Size,Price,Reviews,Rating,Content Rating,Genres,Source
0,PAC-MAN Premium,100788224,3.99,21292,4.0,4+,Games,Apple
1,Evernote - stay organized,158578688,0.0,161065,4.0,4+,Productivity,Apple
2,"WeatherBug - Local Weather, Radar, Maps, Alerts",100524032,0.0,188583,3.5,4+,Weather,Apple
3,"eBay: Best App to Buy, Sell, Save! Online Shop...",128512000,0.0,262241,4.0,12+,Shopping,Apple
4,Bible,92774400,0.0,985920,4.5,4+,Reference,Apple


In [107]:
# Change format of 'Size' column in Google app's data from 19M to 19000000 to match Apple's data and perform calculations 
# Defining a function

def value_to_float(x):
    if type(x) == float or type(x) == int:
        return x
    if 'K' in x:
        if len(x) > 1:
            return float(x.replace('K', '')) * 1000
        return 1000
    if 'M' in x:
        if len(x) > 1:
            return float(x.replace('M', '')) * 1000000
        return 1000000
    if 'B' in x:
        return float(x.replace('B', '')) * 1000000000
    return 0

# Calling above function

google_app_data["Size"] =  google_app_data["Size"].apply(value_to_float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [16]:
#google_app_data.head()

In [19]:
# Rename google data's 'Content Reading' to match with apple's dataset
google_app_data["Content Rating"] = google_app_data["Content Rating"].replace(['Everyone'], '4+')
google_app_data["Content Rating"] = google_app_data['Content Rating'].replace(["Everyone 10+"], '9+')
google_app_data["Content Rating"] = google_app_data['Content Rating'].replace(["Teen"], '12+')
google_app_data["Content Rating"] = google_app_data['Content Rating'].replace(["Mature 17+"], '17+')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cavea

In [20]:
google_app_data.head()

Unnamed: 0,App,Size,Price,Reviews,Rating,Content Rating,Genres,Source
0,Photo Editor & Candy Camera & Grid & ScrapBook,19M,0,159,4.1,4+,Art & Design,Google
1,Coloring book moana,14M,0,967,3.9,4+,Art & Design;Pretend Play,Google
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",8.7M,0,87510,4.7,4+,Art & Design,Google
3,Sketch - Draw & Paint,25M,0,215644,4.5,12+,Art & Design,Google
4,Pixel Draw - Number Art Coloring Book,2.8M,0,967,4.3,4+,Art & Design;Creativity,Google


In [21]:
apple_store_data.head()

Unnamed: 0,App,Size,Price,Reviews,Rating,Content Rating,Genres,Source
0,PAC-MAN Premium,100788224,3.99,21292,4.0,4+,Games,Apple
1,Evernote - stay organized,158578688,0.0,161065,4.0,4+,Productivity,Apple
2,"WeatherBug - Local Weather, Radar, Maps, Alerts",100524032,0.0,188583,3.5,4+,Weather,Apple
3,"eBay: Best App to Buy, Sell, Save! Online Shop...",128512000,0.0,262241,4.0,12+,Shopping,Apple
4,Bible,92774400,0.0,985920,4.5,4+,Reference,Apple


In [22]:
google_app_data.shape

(10841, 8)