# User Data for Free Apps
---
This project analyzes user data for Android and iOS mobile apps that are free to download and install.

The goal of the project is to analyze data to help developers understand which app types are the most likely to attract more users.
- - -

In [3]:
from csv import reader
ios_data = list(reader(open('AppleStore.csv')))
android_data = list(reader(open('googleplaystore.csv')))

In [6]:
def explore_data(dataset, start, end, rows_and_columns=False):
    dataset_slice = dataset[start:end]
    for row in dataset_slice:
        print(row)
        print('\n') # adds a new (empty) line aftr each row
    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))
print('iOS apps:')
print('\n')
explore_data(ios_data[1:],0,5,True)
print('\n')
print('\n')
print('android apps:')
print('\n')
explore_data(android_data[1:],0,5,True)
print('\n')
print('\n')

iOS apps:


['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']


['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']


['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1']


['420009108', 'Temple Run', '65921024', 'USD', '0.0', '1724546', '3842', '4.5', '4.0', '1.6.2', '9+', 'Games', '40', '5', '1', '1']


['284035177', 'Pandora - Music & Radio', '130242560', 'USD', '0.0', '1126879', '3594', '4.0', '4.5', '8.4.1', '12+', 'Music', '37', '4', '1', '1']


Number of rows: 7197
Number of columns: 16




android apps:


['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['Coloring book moana', '

In [7]:
print('iOS info:')
print(ios_data[0])
print('android info:')
print(android_data[0])

iOS info:
['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']
android info:
['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']


In [8]:
print(len(android_data[0]))

13


In [10]:
print(len(android_data[10473]))

12


In [11]:
del android_data[10473]

In [12]:
print(len(android_data[10473]))

13


In [14]:
ios_len = len(ios_data[0])
print(ios_len)

16


In [17]:
ios_row_ix = 0
for ios_row in ios_data[1:]:
    if len(ios_row) != ios_len:
        print(ios_row_ix, len(ios_row))
    ios_row_ix += 1

The Google Play store data set has duplicate entries for many apps. For example, the following Android apps show up multiple times in the data set:

In [22]:
unique_android_apps = []
duplicate_android_apps = []
for android_app in android_data[1:]:
    name = android_app[0]
    if name in unique_android_apps:
        duplicate_android_apps.append(name)
    else:
        unique_android_apps.append(name)
    
print(duplicate_android_apps[0:5])

['Quick PDF Scanner + OCR FREE', 'Box', 'Google My Business', 'ZOOM Cloud Meetings', 'join.me - Simple Meetings']


Overall, the data set contains the following number of duplicates:

In [24]:
n_android_duplicates = len(duplicate_android_apps)
print(n_android_duplicates)

1181


To remove duplicate entries, we will identify the entry with the largest number of review, which should represent the most recent data. This entry will be saved, and all other entries for the same app will be deleted.

In [25]:
reviews_max = {}
for android_app in android_data[1:]:
    name = android_app[0]
    n_reviews = float(android_app[3])
    if ( name in reviews_max and reviews_max[name] < n_reviews ) or name not in reviews_max:
        reviews_max[name] = n_reviews
print(len(reviews_max))

9659


In [27]:
android_clean = []
already_added = []
for android_app in android_data[1:]:
    name = android_app[0]
    n_reviews = float(android_app[3])
    if name in reviews_max:
        if n_reviews == reviews_max[name] and name not in already_added:
            android_clean.append(android_app)
            already_added.append(name)
print(len(android_clean))

9659


The above code removes duplicate entries for apps that appear more than once, saving only the app entry for which the most number of reviews are reported.

In [30]:
def isEnglish(a_str):
    n_nonEnglish = 0
    for ichar in a_str:
        if ord(ichar) > 127:
            n_nonEnglish += 1
    if n_nonEnglish > 3:
        return False
    else:
        return True
print(isEnglish('Instagram'))
print(isEnglish('爱奇艺PPS -《欢乐颂2》电视剧热播'))
print(isEnglish('Docs To Go™ Free Office Suite'))
print(isEnglish('Instachat 😜'))

True
False
True
True


In [32]:
english_ios_data = []
english_android_data = []
for ios_app in ios_data[1:]:
    name = ios_app[0]
    if isEnglish(name):
        english_ios_data.append(ios_app)
for android_app in android_clean[1:]:
    name = android_app[0]
    if isEnglish(name):
        english_android_data.append(android_app)
print(len(english_ios_data))
print(len(english_android_data))

7197
9613


In [33]:
print(len(ios_data))

7198


In [43]:
free_ios_data = []
free_android_data = []
for ios_app in english_ios_data:
    if float(ios_app[4]) == 0.0:
        free_ios_data.append(ios_app)
for android_app in english_android_data:
    if android_app[7] == '0':
        free_android_data.append(android_app)
print(len(free_ios_data))
print(len(free_android_data))

4056
8863


We want to identify apps that are successful in both the App Store and Google Play markets, since maximizing users will help increase ad revenue. To validate an app is successful across both markets, we first test a minimal Android version in Google Play; pending a good user response, we then develop the Android app further; after six months, if the app proves profitable, we develop an iOS version for the App Store.

In [48]:
def freq_table(dataset, index):
    table = {}
    for row in dataset:
        col = row[index]
        if col in table:
            table[col] += ( 1 / len(dataset) * 100 )
        else:
            table[col] = ( 1 / len(dataset) * 100 )
    return table
        
def display_table(dataset, index):
    table = freq_table(dataset, index)
    table_display = []
    for key in table:
        key_val_as_tuple = (table[key], key)
        table_display.append(key_val_as_tuple)
        
    table_sorted = sorted(table_display, reverse = True)
    for entry in table_sorted:
        print(entry[1], ':', entry[0])
        
print('iOS genres:')
print(display_table(free_ios_data,11))
print('\n')
print('Android genres')
print(display_table(free_android_data,9))
print('\n')
print('Android categories')
print(display_table(free_android_data,1))

iOS genres:
Games : 55.645956607493034
Entertainment : 8.23471400394471
Photo & Video : 4.117357001972387
Social Networking : 3.5256410256410273
Education : 3.2544378698224867
Shopping : 2.983234714003946
Utilities : 2.6873767258382655
Lifestyle : 2.3175542406311647
Finance : 2.071005917159764
Sports : 1.947731755424064
Health & Fitness : 1.873767258382644
Music : 1.6518737672583834
Book : 1.6272189349112434
Productivity : 1.5285996055226831
News : 1.429980276134123
Travel : 1.3806706114398428
Food & Drink : 1.0601577909270221
Weather : 0.7642998027613416
Reference : 0.493096646942801
Navigation : 0.493096646942801
Business : 0.493096646942801
Catalogs : 0.22189349112426038
Medical : 0.19723865877712032
None


Android genres
Tools : 8.45086313889199
Entertainment : 6.070179397495204
Education : 5.348076272142615
Business : 4.592124562789123
Productivity : 3.892587160103802
Lifestyle : 3.892587160103802
Finance : 3.7007785174320205
Medical : 3.5315355974275073
Sports : 3.463838429425702

At 55%, gaming apps are by far the most prevalent among free English iOS apps in the App Store, followed distantly by general entertaintment apps at 8%. Other entertaintment-based apps, like social networking and photo and video apps, are generally more prevalent than apps designed for practical purposes, like shopping and education. In general, for free English iOS apps, the App Store is dominated by apps geared towards entertainment.

The most common genres for free English Android apps on Google Play include family apps, games, and tools, I

In [50]:
for genre in freq_table(free_ios_data,11):
    total = 0
    len_genre = 0
    for app in free_ios_data:
        genre_app = app[11]
        if genre_app == genre:
            n_ratings = float(app[5])
            total += n_ratings
            len_genre += 1
    avg_n_ratings = total / len_genre
    print(genre, avg_n_ratings)

Social Networking 53078.195804195806
Photo & Video 27249.892215568863
Games 18924.68896765618
Music 56482.02985074627
Reference 67447.9
Health & Fitness 19952.315789473683
Weather 47220.93548387097
Utilities 14010.100917431193
Travel 20216.01785714286
Shopping 18746.677685950413
News 15892.724137931034
Navigation 25972.05
Lifestyle 8978.308510638299
Entertainment 10822.961077844311
Food & Drink 20179.093023255813
Sports 20128.974683544304
Book 8498.333333333334
Finance 13522.261904761905
Education 6266.333333333333
Productivity 19053.887096774193
Business 6367.8
Catalogs 1779.5555555555557
Medical 459.75


Reference, Music, and Social Networking apps are the most popular free English iOS apps, with about double the amount of user ratings over the next leading app types.

In [53]:
for category in freq_table(free_android_data,1):
    total = 0
    len_category = 0
    for app in free_android_data:
        category_app = app[1]
        if category_app == category:
            n_installs = app[5]
            n_installs = n_installs.replace('+','')
            n_installs = n_installs.replace(',','')
            n_installs = float(n_installs)
            total += n_installs
            len_category += 1
    avg_n_installs = total / len_category
    if avg_n_installs > 10000000:
        print(category, avg_n_installs)

COMMUNICATION 38456119.167247385
ENTERTAINMENT 11640705.88235294
GAME 15588015.603248259
SOCIAL 23253652.127118643
PHOTOGRAPHY 17840110.40229885
TRAVEL_AND_LOCAL 13984077.710144928
TOOLS 10801391.298666667
PRODUCTIVITY 16787331.344927534
VIDEO_PLAYERS 24727872.452830188


Communication, social, and video player apps are the most installed free English Android apps on Google Play. 