# Apps for Apple Store and Google Play

In this project, we aim to form a profile for profitable apps. We hope to understand which apps are demanded. If this profile can be created, app designers push themselves making apps proper for result of this project.

In [65]:
def explore_data(dataset, start, end, rows_and_columns=False):
    dataset_slice = dataset[start:end]    
    for row in dataset_slice:
        print(row)
        print('\n') # adds a new (empty) line after each row

    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))

In [66]:
from csv import reader

opened_file_1 = open("AppleStore.csv")
read_file_1 = reader(opened_file_1)
data_1 = list(read_file_1)

opened_file_2 = open("googleplaystore.csv")
read_file_2 = reader(opened_file_2)
data_2 = list(read_file_2)

In [67]:
explore_data(data_1, 0, 6, True)

['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']


['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']


['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']


['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1']


['420009108', 'Temple Run', '65921024', 'USD', '0.0', '1724546', '3842', '4.5', '4.0', '1.6.2', '9+', 'Games', '40', '5', '1', '1']


['284035177', 'Pandora - Music & Radio', '130242560', 'USD', '0.0', '1126879', '3594', '4.0', '4.5', '8.4.1', '12+', 'Music', '37', '4', '1', '1']


Number of rows: 7198
Number of columns: 16


In [68]:
print(data_1[0])
print(data_2[0])

['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']
['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']


In the "googleplaystore" file, there is a wrong data. Its values are shifted to right, so it has to removed. 

In [69]:
del data_2[10473]

In addition to wrong data, there are also repeated apps. They should also be removed. It can be seen below that there are 1181 repeated apps. 

In [70]:
duplicate_apps = []
unique_apps = []

for app in data_2[1:]:
    name = app[0]
    if name not in unique_apps:
        unique_apps.append(name)
    else:
        duplicate_apps.append(name)

In [71]:
len(duplicate_apps)

1181

*Box* is one of the repeated apps. Code shows that there are 3 *Box* app entry in the "googleplaystore" file.

In [72]:
for app in data_2:
    name = app[0]
    if name == "Box":
        print(app)

['Box', 'BUSINESS', '4.2', '159872', 'Varies with device', '10,000,000+', 'Free', '0', 'Everyone', 'Business', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Box', 'BUSINESS', '4.2', '159872', 'Varies with device', '10,000,000+', 'Free', '0', 'Everyone', 'Business', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Box', 'BUSINESS', '4.2', '159872', 'Varies with device', '10,000,000+', 'Free', '0', 'Everyone', 'Business', 'July 31, 2018', 'Varies with device', 'Varies with device']


Review numbers are different for repeated apps. Here, we have to decide. We chose keeping the row having highest review number. To do this, we created a dictionary holding app names and max review number for this app. 

In [73]:
reviews_max = {}

for app in data_2[1:]:
    name = app[0]
    n_reviews = float(app[3])
    if name in reviews_max and reviews_max[name]<n_reviews:
        n_reviews = reviews_max[name]
    if name not in reviews_max:
        reviews_max[name] = n_reviews

Then, we created a new list holding unique apps with max review number. 

In [74]:
android_clean = []
already_added = []

for app in data_2[1:]:
    name = app[0]
    n_reviews = float(app[3])
    if n_reviews == reviews_max[name] and name not in already_added:
        android_clean.append(app)
        already_added.append(name)

We are interested in only apps which are written in English so apps in another languages are removed. 

In [75]:
def eng_apps(string):
    n_non_eng_char = 0
    for char in string:
        if ord(char) > 127:
            n_non_eng_char += 1
        if n_non_eng_char == 3:
            return False
    return True

In [76]:
for app in data_1[1:]:
    name = app[1]
    if not eng_apps(name):
        data_1.remove(app) 

for app in android_clean:
    name = app[0]
    if not eng_apps(name):
        android_clean.remove(app) 

We are interested in only free apps so they are kept only. 

In [83]:
for app in data_1[1:]:
    price = app[4]
    if price != "0.0":
        data_1.remove(app)

for app in android_clean:
    price = app[7]
    if price != "0":
        android_clean.remove(app)

In [91]:
genres = {"Games": 0, "Social Networking":0, "Music":0}

for app in data_1[1:]:
    genre = app[11]
    if genre in genres:
        genres[genre] += 1

In [94]:
def freq_table(dataset, index):
    freq_col = {}
    for app in dataset[1:]:
        col = app[index]
        if col not in freq_col:
            freq_col[col] = 1
        else:
            freq_col[col] += 1
    
    return freq_col

In [96]:
def display_table(dataset, index):
    table = freq_table(dataset, index)
    table_display = []
    for key in table:
        key_val_as_tuple = (table[key], key)
        table_display.append(key_val_as_tuple)

    table_sorted = sorted(table_display, reverse = True)
    for entry in table_sorted:
        print(entry[1], ':', entry[0])

In [103]:
ios_genres = freq_table(data_1, 11)

In [99]:
display_table(android_clean, 9)

Tools : 758
Entertainment : 538
Education : 477
Business : 408
Productivity : 348
Medical : 348
Lifestyle : 347
Finance : 331
Sports : 309
Personalization : 309
Communication : 288
Health & Fitness : 274
Action : 274
Photography : 263
News & Magazines : 248
Social : 236
Travel & Local : 206
Shopping : 199
Books & Reference : 196
Simulation : 183
Dating : 168
Arcade : 165
Video Players & Editors : 158
Casual : 156
Maps & Navigation : 123
Food & Drink : 111
Puzzle : 103
Racing : 88
Role Playing : 86
Strategy : 83
Libraries & Demo : 83
Auto & Vehicles : 82
House & Home : 72
Weather : 71
Events : 63
Adventure : 62
Comics : 53
Beauty : 53
Art & Design : 52
Parenting : 44
Card : 40
Trivia : 37
Casino : 37
Educational;Education : 35
Board : 35
Educational : 33
Education;Education : 31
Casual;Pretend Play : 25
Word : 23
Music : 18
Racing;Action & Adventure : 16
Puzzle;Brain Games : 15
Entertainment;Music & Video : 15
Casual;Action & Adventure : 13
Casual;Brain Games : 12
Board;Brain Games : 11