# Foundations of Computer Science Project

## Import packages

In [1]:
import pandas as pd
import numpy as np
import re

## Import datasets

In [2]:
data_ps = pd.read_csv('googleplaystore.csv') #Import the datasets
data_ur = pd.read_csv('googleplaystore_user_reviews.csv')

In [3]:
data_ps.head() #Check the googleplaystore dataset

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [4]:
data_ur.head() #Check the googleplaystore_user_reviews dataset

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462
2,10 Best Foods for You,,,,
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3


In [3]:
#During the exercise 2 i found that the record 10472 has inaccurate values so i decided to remove it.
data_ps = data_ps.drop(10472)

## 1. Convert the app sizes to a number

In [4]:
data_ps['Size'] = [re.sub('Varies with device', '99999999', size) for size in data_ps['Size']] #Assign to 'Varies with device' a new numeric value 
data_ps['Size'] = [re.sub(',', '.', size) for size in data_ps['Size']] #Format 

In [5]:
sizes_to_num_re = re.compile('(?P<amount>\d*\.*\d*)(?P<unit>\w*\+*)') #

def unit_to_mult(unit):
    if unit == 'G':
        return 1000000000
    if unit == 'M':
        return 1000000 #Identifying the number of bytes into 1 Gb, Mb and Kb
    if unit == 'k':
        return 1000
    return 1

def to_numeric(elem): #Converting the strings into numbers
    m = sizes_to_num_re.search(elem)
    unit = m.group('unit')
    mult = unit_to_mult(unit)
    amount = float(m.group('amount'))
    return int(amount * mult)

In [6]:
data_ps['ExtSize'] = data_ps['Size'].apply(to_numeric) #Applying the function

In [7]:
data_ps['Size'] = [re.sub('99999999', 'Varies with device', size) for size in data_ps['Size']] #Reconverting the value '99999999' into 'Varies with device'
data_ps['ExtSize'] = data_ps['ExtSize'].apply(str)
data_ps['ExtSize'] = [re.sub('99999999', 'Varies with device', size) for size in data_ps['ExtSize']]
data_ps.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,ExtSize
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,19000000
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,14000000
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up,8700000
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up,25000000
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up,2800000


## 2. Convert the number of installs to a number

In [8]:
data_ps.dtypes #Check the dataframe's columns dtypes

App                object
Category           object
Rating            float64
Reviews            object
Size               object
Installs           object
Type               object
Price              object
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
ExtSize            object
dtype: object

In [9]:
data_ps.groupby('Installs')['Installs'].size() #Check for some strange values

Installs
0                    1
0+                  14
1+                  67
1,000+             907
1,000,000+        1579
1,000,000,000+      58
10+                386
10,000+           1054
10,000,000+       1252
100+               719
100,000+          1169
100,000,000+       409
5+                  82
5,000+             477
5,000,000+         752
50+                205
50,000+            479
50,000,000+        289
500+               330
500,000+           539
500,000,000+        72
Name: Installs, dtype: int64

In [10]:
data_ps['NumInstalls'] = [re.sub(',', '', number) for number in data_ps['Installs']] #Remove the comma

In [11]:
data_ps['NumInstalls'] = [re.sub('\+*', '', number) for number in data_ps['NumInstalls']] #Remove the plus
data_ps['NumInstalls'] = data_ps['NumInstalls'].apply(float) #Converting to float

In [14]:
data_ps.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,ExtSize,NumInstalls
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,19000000,10000.0
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,14000000,500000.0
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up,8700000,5000000.0
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up,25000000,50000000.0
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up,2800000,100000.0


## 3. Transform “Varies with device” into a missing value

In [12]:
data_psnan = data_ps.replace('Varies with device', np.nan)

In [13]:
data_psnan['ExtSize'] = data_psnan['ExtSize'].apply(float) #Converting into float

In [14]:
data_psnan.dtypes #Checking

App                object
Category           object
Rating            float64
Reviews            object
Size               object
Installs           object
Type               object
Price              object
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
ExtSize           float64
NumInstalls       float64
dtype: object

## 4. Convert Current Ver and Android Ver into a dotted number (e.g. 4.0.3 or 4.2)

In [20]:
data_psnan.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,ExtSize,NumInstalls
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,19000000.0,10000.0
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,14000000.0,500000.0
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up,8700000.0,5000000.0
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",,4.2 and up,25000000.0,50000000.0
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up,2800000.0,100000.0


In [21]:
data_psnan.isnull().any() #Checking for missing values

App               False
Category          False
Rating             True
Reviews           False
Size               True
Installs          False
Type               True
Price             False
Content Rating    False
Genres            False
Last Updated      False
Current Ver        True
Android Ver        True
ExtSize            True
NumInstalls       False
dtype: bool

In [15]:
data_nomis = data_psnan.dropna()
data_nomis.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,ExtSize,NumInstalls
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,19000000.0,10000.0
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,14000000.0,500000.0
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up,8700000.0,5000000.0
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up,2800000.0,100000.0
5,Paper flowers instructions,ART_AND_DESIGN,4.4,167,5.6M,"50,000+",Free,0,Everyone,Art & Design,"March 26, 2017",1.0,2.3 and up,5600000.0,50000.0


In [16]:
data_nomis.groupby('Current Ver').size().head()

Current Ver
0.0.0.2    1
0.0.1      6
0.0.2      3
0.0.3      1
0.0.4      2
dtype: int64

In [17]:
pd.options.mode.chained_assignment = None

In [18]:
data_nomis['CurrVer'] = [re.sub('\\D', '', row) for row in data_nomis['Current Ver']] #Remove everything that is not a number from Current Ver

In [19]:
data_nomis['CurrVer'] = [re.sub(r'(?<!^)(?=(\d{1})+$)', r'.', numb) for numb in data_nomis['CurrVer']]

In [20]:
data_nomis.head() #Did it

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,ExtSize,NumInstalls,CurrVer
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,19000000.0,10000.0,1.0.0
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,14000000.0,500000.0,2.0.0
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up,8700000.0,5000000.0,1.2.4
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up,2800000.0,100000.0,1.1
5,Paper flowers instructions,ART_AND_DESIGN,4.4,167,5.6M,"50,000+",Free,0,Everyone,Art & Design,"March 26, 2017",1.0,2.3 and up,5600000.0,50000.0,1.0


In [21]:
#Now on Android Ver
data_nomis.groupby('Android Ver')['Android Ver'].size()

Android Ver
1.0 and up          2
1.5 and up         14
1.6 and up         87
2.0 and up         27
2.0.1 and up        7
2.1 and up        113
2.2 and up        206
2.3 and up        564
2.3.3 and up      235
3.0 and up        211
3.1 and up          8
3.2 and up         31
4.0 and up       1106
4.0.3 - 7.1.1       2
4.0.3 and up     1184
4.1 - 7.1.1         1
4.1 and up       1921
4.2 and up        316
4.3 and up        193
4.4 and up        805
4.4W and up         6
5.0 - 6.0           1
5.0 - 8.0           2
5.0 and up        487
5.1 and up         17
6.0 and up         44
7.0 - 7.1.1         1
7.0 and up         39
7.1 and up          2
8.0 and up          5
Name: Android Ver, dtype: int64

In [22]:
data_nomis['AndroVer'] = [re.sub('\\D', '', row) for row in data_nomis['Android Ver']]
data_nomis['AndroVer'] = [re.sub(r'(?<!^)(?=(\d{1})+$)', r'.', numb) for numb in data_nomis['AndroVer']]

In [23]:
data_nomis.head() #Did it

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,ExtSize,NumInstalls,CurrVer,AndroVer
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,19000000.0,10000.0,1.0.0,4.0.3
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,14000000.0,500000.0,2.0.0,4.0.3
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up,8700000.0,5000000.0,1.2.4,4.0.3
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up,2800000.0,100000.0,1.1,4.4
5,Paper flowers instructions,ART_AND_DESIGN,4.4,167,5.6M,"50,000+",Free,0,Everyone,Art & Design,"March 26, 2017",1.0,2.3 and up,5600000.0,50000.0,1.0,2.3


## 5. Remove the duplicates

In [24]:
data_nomis.groupby('App')['App'].size()

App
+Download 4 Instagram Twitter                                                                                                        1
- Free Comics - Comic Apps                                                                                                           1
.R                                                                                                                                   1
/u/app                                                                                                                               1
058.ba                                                                                                                               1
1. FC Köln App                                                                                                                       1
10 Best Foods for You                                                                                                                2
10 Minutes a Day Times Tables                      

In [25]:
len(data_nomis)

7637

In [26]:
data_nomis.drop_duplicates(subset = 'App', keep = 'first', inplace = True) #Remove all the App duplicates keeping the first

In [27]:
len(data_nomis) #Did it

6953

## 6. For each category, compute the number of apps

In [28]:
data_nomis.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,ExtSize,NumInstalls,CurrVer,AndroVer
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,19000000.0,10000.0,1.0.0,4.0.3
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,14000000.0,500000.0,2.0.0,4.0.3
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up,8700000.0,5000000.0,1.2.4,4.0.3
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up,2800000.0,100000.0,1.1,4.4
5,Paper flowers instructions,ART_AND_DESIGN,4.4,167,5.6M,"50,000+",Free,0,Everyone,Art & Design,"March 26, 2017",1.0,2.3 and up,5600000.0,50000.0,1.0,2.3


In [29]:
data_nomis.groupby('Category').size()

Category
ART_AND_DESIGN           57
AUTO_AND_VEHICLES        62
BEAUTY                   37
BOOKS_AND_REFERENCE     141
BUSINESS                221
COMICS                   46
COMMUNICATION           186
DATING                  122
EDUCATION                87
ENTERTAINMENT            62
EVENTS                   35
FAMILY                 1501
FINANCE                 255
FOOD_AND_DRINK           72
GAME                    814
HEALTH_AND_FITNESS      189
HOUSE_AND_HOME           50
LIBRARIES_AND_DEMO       60
LIFESTYLE               267
MAPS_AND_NAVIGATION      93
MEDICAL                 265
NEWS_AND_MAGAZINES      153
PARENTING                44
PERSONALIZATION         271
PHOTOGRAPHY             203
PRODUCTIVITY            221
SHOPPING                145
SOCIAL                  153
SPORTS                  221
TOOLS                   623
TRAVEL_AND_LOCAL        139
VIDEO_PLAYERS           109
WEATHER                  49
dtype: int64

## 7. For each category, compute the average rating

In [30]:
data_nomis.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,ExtSize,NumInstalls,CurrVer,AndroVer
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,19000000.0,10000.0,1.0.0,4.0.3
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,14000000.0,500000.0,2.0.0,4.0.3
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up,8700000.0,5000000.0,1.2.4,4.0.3
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up,2800000.0,100000.0,1.1,4.4
5,Paper flowers instructions,ART_AND_DESIGN,4.4,167,5.6M,"50,000+",Free,0,Everyone,Art & Design,"March 26, 2017",1.0,2.3 and up,5600000.0,50000.0,1.0,2.3


In [31]:
data_nomis.groupby('Category')[['Rating']].mean()

Unnamed: 0_level_0,Rating
Category,Unnamed: 1_level_1
ART_AND_DESIGN,4.378947
AUTO_AND_VEHICLES,4.146774
BEAUTY,4.291892
BOOKS_AND_REFERENCE,4.322695
BUSINESS,4.095475
COMICS,4.193478
COMMUNICATION,4.074194
DATING,3.963934
EDUCATION,4.38046
ENTERTAINMENT,4.154839


## 8. Create two dataframes: one for the genres and one bridging apps and genders. So that, for instance, the app Pixel Draw - Number Art Coloring Book appears twice in the bridging table, once for Art & Design, once for Creativity

In [58]:
data_nomis.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,ExtSize,NumInstalls,CurrVer,AndroVer
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up,19000000.0,10000.0,1.0.0,4.0.3
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,14000000.0,500000.0,2.0.0,4.0.3
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up,8700000.0,5000000.0,1.2.4,4.0.3
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up,2800000.0,100000.0,1.1,4.4
5,Paper flowers instructions,ART_AND_DESIGN,4.4,167,5.6M,"50,000+",Free,0,Everyone,Art & Design,"March 26, 2017",1.0,2.3 and up,5600000.0,50000.0,1.0,2.3


In [32]:
data_nomis['GenresList'] = [genre.split(';') for genre in data_nomis['Genres']]

In [83]:
genres = data_nomis.GenresList.apply(pd.Series) #genres dataframe
genres.head()

Unnamed: 0,0,1
0,Art & Design,
1,Art & Design,Pretend Play
2,Art & Design,
4,Art & Design,Creativity
5,Art & Design,


In [68]:
app = data_nomis['App']

In [84]:
genres_1 = pd.merge(genres, app.to_frame(), left_index = True, right_index = True)\
           .melt(id_vars = ['App'], value_name = "Genres_1")\
           .drop("variable", axis = 1)\
           .dropna() #Bridgind table
genres_1.head()

Unnamed: 0,App,Genres_1
0,Photo Editor & Candy Camera & Grid & ScrapBook,Art & Design
1,Coloring book moana,Art & Design
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",Art & Design
3,Pixel Draw - Number Art Coloring Book,Art & Design
4,Paper flowers instructions,Art & Design


In [75]:
print(genres_1[genres_1['App'] == 'Pixel Draw - Number Art Coloring Book']) #Did it

                                        App      Genres_1
3     Pixel Draw - Number Art Coloring Book  Art & Design
6956  Pixel Draw - Number Art Coloring Book    Creativity


## 11. For each app, compute the approximate income, obtain as a product of number of installs and price.

In [96]:
data_nomis['NumPrice'] = [re.sub('\$','', price) for price in data_nomis['Price']]

In [97]:
data_nomis['NumPrice'] = data_nomis['NumPrice'].apply(float)

In [98]:
data_nomis['Income'] = data_nomis['NumPrice'] * data_nomis['NumInstalls']