# Profitable app profiles

# Scenario

Our aim in this project is to find mobile app profiles that are profitable. Assuming We're working as data analysts for a company that builds mobile apps, and our job is to enable our team of developers to make data-driven decisions with respect to the kind of apps they build.

At our company, we only build apps that are free to download and install, and our main source of revenue consists of in-app ads. This means that our revenue for any given app is mostly influenced by the number of users that use our app. Our goal for this project is to analyze data to help our developers understand what kinds of apps are likely to attract more users.



In [1]:
from csv import reader

In [2]:
android = list(reader(open("googleplaystore.csv", encoding = "utf-8")))

In [3]:
type(android)

list

In [4]:
len(android)

10842

In [5]:
android_headers = android[0]
android_headers

['App',
 'Category',
 'Rating',
 'Reviews',
 'Size',
 'Installs',
 'Type',
 'Price',
 'Content Rating',
 'Genres',
 'Last Updated',
 'Current Ver',
 'Android Ver']

In [6]:
android = android[1:]

In [7]:
print(android[10472])

print()

print(android_headers)

print()

print(android[0])

['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up']

['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']

['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


In [8]:
del android[10472]

In [9]:
len(android)

10840

# Removing duplicates apps

In [10]:
for i in android:
    name = i[0]
    if name == "Instagram":
        print(i)

['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66509917', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']


In [11]:
duplicate_app = []
unique_app = []

for i in android:
    app = i[0]
    # print(app)
    if app in unique_app:
        duplicate_app.append(app)
    else:
        unique_app.append(app)

In [12]:
len(duplicate_app)

1181

In [13]:
len(unique_app)

9659

In [14]:
# reviwes = {"Instagram":66577440}

reviews = {}

for i in android:
    app_name = i[0]
    n_reviews = float(i[3])
    # print(n_reviews)
    if app_name not in reviews:
        reviews[app_name] = n_reviews
    elif app_name in reviews and reviews[app_name] < n_reviews:
        reviews[app_name] = n_reviews

In [15]:
reviews["Instagram"]

66577446.0

In [16]:
android_clean = []
already_added = []
for i in android:
    n_reviews = float(i[3])
    app_name = i[0]
    if reviews[app_name] == n_reviews and app_name not in already_added:
        android_clean.append(i)
        already_added.append(app_name)
        
        

In [17]:
# ['Whatsapp', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
# ['Whatsapp', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
# ['Whatsapp', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
# ['Whatsapp', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']

In [18]:
len(android_clean)

9659

In [19]:
android_clean[4412][0]

'中国語 AQリスニング'

In [20]:
android_clean[7940][0]

'لعبة تقدر تربح DZ'

# Removing Non English App

In [21]:
ord("A")

65

In [22]:
for i in range(0,128):
    #print(chr(i))

 








	























 
!
"
#
$
%
&
'
(
)
*
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
;
<
=
>
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
[
\
]
^
_
`
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
{
|
}
~



In [23]:
def is_english(app_name):
    for i in app_name:
        if ord(i) > 127:
            return False
        else:
            return True

In [24]:
is_english("Instagram")

True

In [28]:
is_english("中国語 AQリスニング")

False

In [29]:
is_english("😋")

False

In [30]:
ord("😋")

128523

In [31]:
def is_english(app_name):
    non_ascii = 0
    for i in app_name: # instagram😋  , i = i
        if ord(i) > 127:
            non_ascii += 1
    
    if non_ascii > 3:
        return False
    else:
        return True

In [36]:
is_english("Instagram 😋😋😋😋")

False

In [37]:
len(android_clean)

9659

In [39]:
android_english = []

for i in android_clean:
    app_name = i[0]
#     print(app_name)
    if is_english(app_name):
        android_english.append(i)

In [41]:
len(android_english)

9614

In [45]:
android_final = []

for i in android_english:
    price = i[7]
#     print(price)
    if price == "0":
        android_final.append(i)

In [46]:
len(android_final)

8864

In [47]:
categories = []

for i in android_final:
    categories.append(i[1])

In [50]:
uni_categories = set(categories)
print(uni_categories)

{'COMMUNICATION', 'ART_AND_DESIGN', 'SHOPPING', 'SPORTS', 'TRAVEL_AND_LOCAL', 'EVENTS', 'LIBRARIES_AND_DEMO', 'FAMILY', 'EDUCATION', 'DATING', 'COMICS', 'SOCIAL', 'PRODUCTIVITY', 'MAPS_AND_NAVIGATION', 'PARENTING', 'WEATHER', 'TOOLS', 'FOOD_AND_DRINK', 'ENTERTAINMENT', 'BUSINESS', 'HOUSE_AND_HOME', 'LIFESTYLE', 'GAME', 'MEDICAL', 'HEALTH_AND_FITNESS', 'PERSONALIZATION', 'NEWS_AND_MAGAZINES', 'BEAUTY', 'FINANCE', 'BOOKS_AND_REFERENCE', 'VIDEO_PLAYERS', 'PHOTOGRAPHY', 'AUTO_AND_VEHICLES'}


# Most Common Genres as per avg installation

In [59]:
avg_installation = []

for i in uni_categories:
    total_ins = 0
    no_app = 0
    for j in android_final:
        category = j[1]
        if i == category:
            install = float(j[5].replace("+","").replace(",",""))
#             print(install)
            total_ins += install
            no_app += 1
    avg = total_ins/no_app
#     print(i,avg)
    avg_installation.append([i,avg])

In [60]:
avg_installation

[['COMMUNICATION', 38456119.167247385],
 ['ART_AND_DESIGN', 1986335.0877192982],
 ['SHOPPING', 7036877.311557789],
 ['SPORTS', 3638640.1428571427],
 ['TRAVEL_AND_LOCAL', 13984077.710144928],
 ['EVENTS', 253542.22222222222],
 ['LIBRARIES_AND_DEMO', 638503.734939759],
 ['FAMILY', 3695641.8198090694],
 ['EDUCATION', 1833495.145631068],
 ['DATING', 854028.8303030303],
 ['COMICS', 817657.2727272727],
 ['SOCIAL', 23253652.127118643],
 ['PRODUCTIVITY', 16787331.344927534],
 ['MAPS_AND_NAVIGATION', 4056941.7741935486],
 ['PARENTING', 542603.6206896552],
 ['WEATHER', 5074486.197183099],
 ['TOOLS', 10801391.298666667],
 ['FOOD_AND_DRINK', 1924897.7363636363],
 ['ENTERTAINMENT', 11640705.88235294],
 ['BUSINESS', 1712290.1474201474],
 ['HOUSE_AND_HOME', 1331540.5616438356],
 ['LIFESTYLE', 1437816.2687861272],
 ['GAME', 15588015.603248259],
 ['MEDICAL', 120550.61980830671],
 ['HEALTH_AND_FITNESS', 4188821.9853479853],
 ['PERSONALIZATION', 5201482.6122448975],
 ['NEWS_AND_MAGAZINES', 9549178.4677419

In [61]:
sorted_list = []

for i in avg_installation:
    sorted_list.append([i[1],i[0]])
sorted_list

[[38456119.167247385, 'COMMUNICATION'],
 [1986335.0877192982, 'ART_AND_DESIGN'],
 [7036877.311557789, 'SHOPPING'],
 [3638640.1428571427, 'SPORTS'],
 [13984077.710144928, 'TRAVEL_AND_LOCAL'],
 [253542.22222222222, 'EVENTS'],
 [638503.734939759, 'LIBRARIES_AND_DEMO'],
 [3695641.8198090694, 'FAMILY'],
 [1833495.145631068, 'EDUCATION'],
 [854028.8303030303, 'DATING'],
 [817657.2727272727, 'COMICS'],
 [23253652.127118643, 'SOCIAL'],
 [16787331.344927534, 'PRODUCTIVITY'],
 [4056941.7741935486, 'MAPS_AND_NAVIGATION'],
 [542603.6206896552, 'PARENTING'],
 [5074486.197183099, 'WEATHER'],
 [10801391.298666667, 'TOOLS'],
 [1924897.7363636363, 'FOOD_AND_DRINK'],
 [11640705.88235294, 'ENTERTAINMENT'],
 [1712290.1474201474, 'BUSINESS'],
 [1331540.5616438356, 'HOUSE_AND_HOME'],
 [1437816.2687861272, 'LIFESTYLE'],
 [15588015.603248259, 'GAME'],
 [120550.61980830671, 'MEDICAL'],
 [4188821.9853479853, 'HEALTH_AND_FITNESS'],
 [5201482.6122448975, 'PERSONALIZATION'],
 [9549178.467741935, 'NEWS_AND_MAGAZINE

In [64]:
sorted_list = sorted(sorted_list, reverse = True)
sorted_list

[[38456119.167247385, 'COMMUNICATION'],
 [24727872.452830188, 'VIDEO_PLAYERS'],
 [23253652.127118643, 'SOCIAL'],
 [17840110.40229885, 'PHOTOGRAPHY'],
 [16787331.344927534, 'PRODUCTIVITY'],
 [15588015.603248259, 'GAME'],
 [13984077.710144928, 'TRAVEL_AND_LOCAL'],
 [11640705.88235294, 'ENTERTAINMENT'],
 [10801391.298666667, 'TOOLS'],
 [9549178.467741935, 'NEWS_AND_MAGAZINES'],
 [8767811.894736841, 'BOOKS_AND_REFERENCE'],
 [7036877.311557789, 'SHOPPING'],
 [5201482.6122448975, 'PERSONALIZATION'],
 [5074486.197183099, 'WEATHER'],
 [4188821.9853479853, 'HEALTH_AND_FITNESS'],
 [4056941.7741935486, 'MAPS_AND_NAVIGATION'],
 [3695641.8198090694, 'FAMILY'],
 [3638640.1428571427, 'SPORTS'],
 [1986335.0877192982, 'ART_AND_DESIGN'],
 [1924897.7363636363, 'FOOD_AND_DRINK'],
 [1833495.145631068, 'EDUCATION'],
 [1712290.1474201474, 'BUSINESS'],
 [1437816.2687861272, 'LIFESTYLE'],
 [1387692.475609756, 'FINANCE'],
 [1331540.5616438356, 'HOUSE_AND_HOME'],
 [854028.8303030303, 'DATING'],
 [817657.27272727

In [66]:
for i in android_final:
    if i[1] == "COMMUNICATION" and (i[5] == "1,000,000,000+" or i[5] == "500,000,000+" or  i[5] == "100,000,000+" ):
        print(i[0],i[5])

WhatsApp Messenger 1,000,000,000+
imo beta free calls and text 100,000,000+
Android Messages 100,000,000+
Google Duo - High Quality Video Calls 500,000,000+
Messenger – Text and Video Chat for Free 1,000,000,000+
imo free video calls and chat 500,000,000+
Skype - free IM & video calls 1,000,000,000+
Who 100,000,000+
GO SMS Pro - Messenger, Free Themes, Emoji 100,000,000+
LINE: Free Calls & Messages 500,000,000+
Google Chrome: Fast & Secure 1,000,000,000+
Firefox Browser fast & private 100,000,000+
UC Browser - Fast Download Private & Secure 500,000,000+
Gmail 1,000,000,000+
Hangouts 1,000,000,000+
Messenger Lite: Free Calls & Messages 100,000,000+
Kik 100,000,000+
KakaoTalk: Free Calls & Text 100,000,000+
Opera Mini - fast web browser 100,000,000+
Opera Browser: Fast and Secure 100,000,000+
Telegram 100,000,000+
Truecaller: Caller ID, SMS spam blocking & Dialer 100,000,000+
UC Browser Mini -Tiny Fast Private & Secure 100,000,000+
Viber Messenger 500,000,000+
WeChat 100,000,000+
Yahoo M

In [69]:
for i in android_final:
    if i[1] == "PRODUCTIVITY" and (i[5] == "50,000,000+" or i[5] == "10,00,000+" or i[5] == "1,000,000+")  :
        print(i[0],i[5])

Power Booster - Junk Cleaner & CPU Cooler & Boost 1,000,000+
Calculator - unit converter 50,000,000+
MyMTN 1,000,000+
Advanced Task Killer 50,000,000+
My Airtel-Online Recharge, Pay Bill, Wallet, UPI 50,000,000+
Do It Later: Tasks & To-Dos 50,000,000+
Verizon Cloud 50,000,000+
myAT&T 50,000,000+
Hacker's Keyboard 1,000,000+
MEGA 50,000,000+
Security & Privacy 1,000,000+
Loop - Habit Tracker 1,000,000+
TickTick: To Do List with Reminder, Day Planner 1,000,000+
Pushbullet - SMS on PC 1,000,000+
Planner Pro-Personal Organizer 1,000,000+
Cozi Family Organizer 1,000,000+
IFTTT 1,000,000+
Dashlane Free Password Manager 1,000,000+
Solid Explorer Classic 1,000,000+
File Browser by Astro (File Manager) 50,000,000+
Solid Explorer File Manager 1,000,000+
Smart File Manager 1,000,000+
Simple Notepad 1,000,000+
Sticky Note + : Sync Notes 1,000,000+
Squid - Take Notes & Markup PDFs 1,000,000+
JotterPad - Writer, Screenplay, Novel 1,000,000+
Calendar+ Schedule Planner App 1,000,000+
Today Calendar 20

# Conclusion

We are recommending 'PRODUCTIVITY' category and we can go with MyMTN type app where will add on some A.I models and further adds on on the basis our research