# Google Play Store Apps

##### source:- https://www.kaggle.com/lava18/google-play-store-apps#license.txt

## Loading our data

In [1]:
import pandas as pd
import numpy as np

In [2]:
store_df = pd.read_csv('googleplaystore.csv')
store_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


## Data Cleaning

In [3]:
store_df.isna().sum() # for geting no of missing values

App                  0
Category             0
Rating            1474
Reviews              0
Size                 0
Installs             0
Type                 1
Price                0
Content Rating       1
Genres               0
Last Updated         0
Current Ver          8
Android Ver          3
dtype: int64

In [4]:
store_df.shape

(10841, 13)

In [5]:
store_df.info() # to see the data types of the feature

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 13 columns):
App               10841 non-null object
Category          10841 non-null object
Rating            9367 non-null float64
Reviews           10841 non-null object
Size              10841 non-null object
Installs          10841 non-null object
Type              10840 non-null object
Price             10841 non-null object
Content Rating    10840 non-null object
Genres            10841 non-null object
Last Updated      10841 non-null object
Current Ver       10833 non-null object
Android Ver       10838 non-null object
dtypes: float64(1), object(12)
memory usage: 1.1+ MB


In [7]:
store_df['Rating'].describe()

count    9367.000000
mean        4.193338
std         0.537431
min         1.000000
25%         4.000000
50%         4.300000
75%         4.500000
max        19.000000
Name: Rating, dtype: float64

In [16]:
store_df['Rating'].fillna(store_df['Rating'].median(),inplace=True)

In [17]:
store_df.isna().sum() 

App               0
Category          0
Rating            0
Reviews           0
Size              0
Installs          0
Type              1
Price             0
Content Rating    1
Genres            0
Last Updated      0
Current Ver       8
Android Ver       3
dtype: int64

In [59]:
# let's drop not so important feature
df = store_df.drop(['Android Ver','Current Ver','Last Updated','Content Rating'],1)
df.head(20)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Genres
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Art & Design
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Art & Design;Pretend Play
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Art & Design
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Art & Design
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Art & Design;Creativity
5,Paper flowers instructions,ART_AND_DESIGN,4.4,167,5.6M,"50,000+",Free,0,Art & Design
6,Smoke Effect Photo Maker - Smoke Editor,ART_AND_DESIGN,3.8,178,19M,"50,000+",Free,0,Art & Design
7,Infinite Painter,ART_AND_DESIGN,4.1,36815,29M,"1,000,000+",Free,0,Art & Design
8,Garden Coloring Book,ART_AND_DESIGN,4.4,13791,33M,"1,000,000+",Free,0,Art & Design
9,Kids Paint Free - Drawing Fun,ART_AND_DESIGN,4.7,121,3.1M,"10,000+",Free,0,Art & Design;Creativity


In [60]:
df.isna().sum() 

App         0
Category    0
Rating      0
Reviews     0
Size        0
Installs    0
Type        1
Price       0
Genres      0
dtype: int64

In [61]:
df.Type.mode()

0    Free
dtype: object

In [62]:
df.Type.value_counts()

Free    10039
Paid      800
0           1
Name: Type, dtype: int64

In [63]:
df[df['Type']=='0']

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Genres
10472,Life Made WI-Fi Touchscreen Photo Frame,1.9,19.0,3.0M,"1,000+",Free,0,Everyone,"February 11, 2018"


In [64]:
df = df.drop(10472) # we dropped our outlier using drop
df.Type.value_counts()

Free    10039
Paid      800
Name: Type, dtype: int64

In [65]:
df.isna().sum() 

App         0
Category    0
Rating      0
Reviews     0
Size        0
Installs    0
Type        1
Price       0
Genres      0
dtype: int64

In [66]:
# now fillna 
df['Type'].fillna('Free',inplace=True)
df.isna().sum() 

App         0
Category    0
Rating      0
Reviews     0
Size        0
Installs    0
Type        0
Price       0
Genres      0
dtype: int64

In [69]:
df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Genres
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Art & Design
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Art & Design;Pretend Play
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Art & Design
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Art & Design
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Art & Design;Creativity


In [71]:
df["Reviews"] = pd.to_numeric(df["Reviews"]) # convert everything to float values
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10840 entries, 0 to 10840
Data columns (total 9 columns):
App         10840 non-null object
Category    10840 non-null object
Rating      10840 non-null float64
Reviews     10840 non-null int64
Size        10840 non-null object
Installs    10840 non-null object
Type        10840 non-null object
Price       10840 non-null object
Genres      10840 non-null object
dtypes: float64(1), int64(1), object(7)
memory usage: 846.9+ KB


In [73]:
df1 = df.sort_values('Reviews', ascending=False)
df1.head(20)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Genres
2544,Facebook,SOCIAL,4.1,78158306,Varies with device,"1,000,000,000+",Free,0,Social
3943,Facebook,SOCIAL,4.1,78128208,Varies with device,"1,000,000,000+",Free,0,Social
381,WhatsApp Messenger,COMMUNICATION,4.4,69119316,Varies with device,"1,000,000,000+",Free,0,Communication
336,WhatsApp Messenger,COMMUNICATION,4.4,69119316,Varies with device,"1,000,000,000+",Free,0,Communication
3904,WhatsApp Messenger,COMMUNICATION,4.4,69109672,Varies with device,"1,000,000,000+",Free,0,Communication
2604,Instagram,SOCIAL,4.5,66577446,Varies with device,"1,000,000,000+",Free,0,Social
2611,Instagram,SOCIAL,4.5,66577313,Varies with device,"1,000,000,000+",Free,0,Social
2545,Instagram,SOCIAL,4.5,66577313,Varies with device,"1,000,000,000+",Free,0,Social
3909,Instagram,SOCIAL,4.5,66509917,Varies with device,"1,000,000,000+",Free,0,Social
382,Messenger – Text and Video Chat for Free,COMMUNICATION,4.0,56646578,Varies with device,"1,000,000,000+",Free,0,Communication


In [81]:
df1 = df1.drop(['Size','Price'],1)
df1.head()

Unnamed: 0,App,Category,Rating,Reviews,Installs,Type,Genres
2544,Facebook,SOCIAL,4.1,78158306,"1,000,000,000+",Free,Social
3943,Facebook,SOCIAL,4.1,78128208,"1,000,000,000+",Free,Social
381,WhatsApp Messenger,COMMUNICATION,4.4,69119316,"1,000,000,000+",Free,Communication
336,WhatsApp Messenger,COMMUNICATION,4.4,69119316,"1,000,000,000+",Free,Communication
3904,WhatsApp Messenger,COMMUNICATION,4.4,69109672,"1,000,000,000+",Free,Communication


In [84]:
df1 = df1.drop(['Genres','Category'],1)
df1.head()

Unnamed: 0,App,Rating,Reviews,Installs,Type
2544,Facebook,4.1,78158306,"1,000,000,000+",Free
3943,Facebook,4.1,78128208,"1,000,000,000+",Free
381,WhatsApp Messenger,4.4,69119316,"1,000,000,000+",Free
336,WhatsApp Messenger,4.4,69119316,"1,000,000,000+",Free
3904,WhatsApp Messenger,4.4,69109672,"1,000,000,000+",Free


In [87]:
print(4891723*4.6)
print(56642847*4.0)
print(78158306*4.1)

22501925.799999997
226571388.0
320449054.59999996


In [88]:
df1['score'] = df1['Rating'] * df1['Reviews']
df1.head()

Unnamed: 0,App,Rating,Reviews,Installs,Type,score
2544,Facebook,4.1,78158306,"1,000,000,000+",Free,320449054.6
3943,Facebook,4.1,78128208,"1,000,000,000+",Free,320325652.8
381,WhatsApp Messenger,4.4,69119316,"1,000,000,000+",Free,304124990.4
336,WhatsApp Messenger,4.4,69119316,"1,000,000,000+",Free,304124990.4
3904,WhatsApp Messenger,4.4,69109672,"1,000,000,000+",Free,304082556.8


In [90]:
from sklearn.preprocessing import MinMaxScaler

scaling=MinMaxScaler()
df1['score']=scaling.fit_transform(df1[['score']])
df1['score'].head()

2544    1.000000
3943    0.999615
381     0.949059
336     0.949059
3904    0.948926
Name: score, dtype: float64

In [103]:
df1.head(20)

Unnamed: 0,App,Rating,Reviews,Installs,Type,score
2544,Facebook,4.1,78158306,"1,000,000,000+",Free,1.0
3943,Facebook,4.1,78128208,"1,000,000,000+",Free,0.999615
381,WhatsApp Messenger,4.4,69119316,"1,000,000,000+",Free,0.949059
336,WhatsApp Messenger,4.4,69119316,"1,000,000,000+",Free,0.949059
3904,WhatsApp Messenger,4.4,69109672,"1,000,000,000+",Free,0.948926
2604,Instagram,4.5,66577446,"1,000,000,000+",Free,0.934933
2611,Instagram,4.5,66577313,"1,000,000,000+",Free,0.934931
2545,Instagram,4.5,66577313,"1,000,000,000+",Free,0.934931
3909,Instagram,4.5,66509917,"1,000,000,000+",Free,0.933985
382,Messenger – Text and Video Chat for Free,4.0,56646578,"1,000,000,000+",Free,0.70709


# Top 10 Popular Apps According To Type

In [122]:
import matplotlib.pyplot as plt
import seaborn as sns

In [128]:
# Function to define top 10 popular recommendation  
def popular_apps(Type):
    b = df1[df1['Type']==Type]
    b = b.sort_values('score',ascending = False)
    h = b['App'].drop_duplicates()
    return h.head(10)

### 1. Top 10 free Popular Apps

In [129]:
popular_apps('Free')

2544                                             Facebook
381                                    WhatsApp Messenger
2604                                            Instagram
382              Messenger – Text and Video Chat for Free
1879                                       Clash of Clans
4005              Clean Master- Space Cleaner & Antivirus
1917                                       Subway Surfers
7536    Security Master - Antivirus, VPN, AppLock, Boo...
3665                                              YouTube
1878                                         Clash Royale
Name: App, dtype: object

### 2. Top 10 Paid Popular Apps

In [113]:
popular_apps('Paid')

2241                        Minecraft
4034                    Hitman Sniper
7417    Grand Theft Auto: San Andreas
9678                Where's My Water?
8860                      Bloons TD 5
5490                       True Skate
2242       Card Wars - Adventure Time
5631          Five Nights at Freddy's
6911            Beautiful Widgets Pro
8804              DraStic DS Emulator
Name: App, dtype: object

# Top 10 Apps According To Genres

In [130]:
df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Genres
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Art & Design
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Art & Design;Pretend Play
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Art & Design
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Art & Design
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Art & Design;Creativity


In [135]:
df['score'] = df['Rating'] * df['Reviews']
df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Genres,score
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Art & Design,651.9
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Art & Design;Pretend Play,3771.3
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Art & Design,411297.0
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Art & Design,970398.0
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Art & Design;Creativity,4158.1


In [136]:
from sklearn.preprocessing import MinMaxScaler

scaling=MinMaxScaler()
df['score']=scaling.fit_transform(df[['score']])
df['score'].head()

0    0.000002
1    0.000012
2    0.001284
3    0.003028
4    0.000013
Name: score, dtype: float64

In [157]:
df['Genres'].value_counts().head(20)

Tools                842
Entertainment        623
Education            549
Medical              463
Business             460
Productivity         424
Sports               398
Personalization      392
Communication        387
Lifestyle            381
Finance              366
Action               365
Health & Fitness     341
Photography          335
Social               295
News & Magazines     283
Shopping             260
Travel & Local       257
Dating               234
Books & Reference    231
Name: Genres, dtype: int64

In [138]:
# Function to define top 10 popular recommendation  
def popular_apps(Genres,Type):
    b = df[df['Genres']==Genres]
    a = b[b['Type']== Type]    
    a = a.sort_values('score',ascending = False)
    h = a['App'].drop_duplicates()
    return h.head(10)

### 1. Top 10 Sports Genre Popular Apps

In [143]:
popular_apps('Sports','Free')

1871                              8 Ball Pool
1902                 Dream League Soccer 2018
2004                              Score! Hero
8445                              FIFA Soccer
6067    Top Eleven 2018 - Be a Soccer Manager
9140                           EA SPORTS UFC®
5416            PES 2018 PRO EVOLUTION SOCCER
9141               NBA LIVE Mobile Basketball
3101                          Real Basketball
8864              Online Soccer Manager (OSM)
Name: App, dtype: object

### 2. Top 10 Education Genre Popular Apps

In [144]:
# Free apps
popular_apps('Education','Free')

832     Learn languages, grammar & vocabulary with Mem...
9620                         Hello English: Learn English
718                                           Math Tricks
807                            Learn English with Wlingua
848                     SoloLearn: Learn to Code for Free
736                                English with Lingualeo
8297                             Babbel – Learn Languages
744                        Elevate - Brain Training Games
740      Quizlet: Learn Languages & Vocab with Flashcards
6295               Learn English Vocabulary - 6,000 Words
Name: App, dtype: object

In [145]:
# Paid apps
popular_apps('Education','Paid')

3993      C4droid - C/C++ compiler & IDE
9848       Learn Italian with MosaLingua
8077                              go41cx
4534                        R Instructor
7347             boattheory.ch Full 2018
7233    Führerschein Klasse CE, LKW 2018
8014                        Morse Player
8051      TI-Nspire CX Calculator Manual
5475      500 AP World History Questions
6201               CSCS BG (в български)
Name: App, dtype: object

### 3. Top 10 Social Genre Popular Apps

In [146]:
# Free social apps
popular_apps('Social','Free')

2544                          Facebook
2604                         Instagram
2610                          Snapchat
2546                     Facebook Lite
3945    Tik Tok - including musical.ly
6373                                VK
2554                           Google+
2552                         Pinterest
2592      Tango - Live Video Broadcast
2588    Badoo - Free Chat & Dating App
Name: App, dtype: object

In [147]:
# Top paid social apps
popular_apps('Social','Paid')

4971    WhatsFake Pro (Ad free)
4721       W-History Standalone
5307    Ak Parti Yardım Toplama
Name: App, dtype: object

### 4. Top 10 Strategy Genre Popular Apps

In [155]:
# Top Free strategy games
popular_apps('Strategy','Free')

1879                                       Clash of Clans
1878                                         Clash Royale
6551                                           Boom Beach
7501                Castle Clash: Heroes of the Empire US
1881                              Plants vs. Zombies FREE
1866    Lords Mobile: Battle of the Empires - Strategy...
7515        Clash of Kings : The King Of Fighters version
4580                       Five Nights at Freddy's 2 Demo
6707                        Vlogger Go Viral - Tuber Game
1998                                    Stick War: Legacy
Name: App, dtype: object

In [156]:
# Top Paid strategy games
popular_apps('Strategy','Paid')

8860                     Bloons TD 5
5627       Five Nights at Freddy's 2
10006            XCOM®: Enemy Within
1834                   The Escapists
4413        Guns'n'Glory WW2 Premium
4813        Z Origins - (Z The Game)
4397            Guns'n'Glory Premium
8321     Myth Defense 2: DF Platinum
10005           European War 6: 1804
4405     Guns'n'Glory Heroes Premium
Name: App, dtype: object

### 5. Top 10 Entertainment Genre Popular Apps

In [158]:
# Top Free
popular_apps('Entertainment','Free')

5856                                 Google Play Games
958                                            Netflix
866                                            Hotstar
4111                                 Talking Tom Cat 2
6269                     Bitmoji – Your Personal Emoji
889     Twitch: Livestream Multiplayer Games & Esports
3877                                          Akinator
4217                                    Talking Ginger
893                                Talking Ben the Dog
874                                     Talking Angela
Name: App, dtype: object

In [159]:
# Top Paid
popular_apps('Entertainment','Paid')

995                   My Talking Pet
1001                  Meme Generator
5354                  I am Rich Plus
6687                TV Guide BR Gold
5362                   I Am Rich Pro
5366                       I Am Rich
4429       Detect-O-Gromit (D.O.G 2)
4983    My baby firework (Remove ad)
4316                    Anna.K Tarot
4293                          K.MOJI
Name: App, dtype: object