<h1> Name: Eshan Mehrotra

In [1]:
import pandas as pd
import numpy as np

In [2]:
itunesdf = pd.read_csv('data/itunes.csv')
itunesdf.head()

Unnamed: 0,app_name,category,appstore_link_url,img_src_url,star_rating,num_ratings
0,SoundCloud - Music & Audio,Music,https://apps.apple.com/gb/app/soundcloud-music...,/autopush/uk/itunes/charts/free-apps/images/20...,4.4,10.2K Ratings
1,Bolt,Travel,https://apps.apple.com/gb/app/bolt/id675033630...,/autopush/uk/itunes/charts/free-apps/images/20...,4.9,29.9K Ratings
2,Google Drive – online backup,Productivity,https://apps.apple.com/gb/app/google-drive-onl...,/autopush/uk/itunes/charts/free-apps/images/20...,4.7,236.4K Ratings
3,Amazon Prime Video,Entertainment,https://apps.apple.com/gb/app/amazon-prime-vid...,/autopush/uk/itunes/charts/free-apps/images/20...,4.7,117.6K Ratings
4,Depop - Buy and sell fashion,Shopping,https://apps.apple.com/gb/app/depop-buy-and-se...,/autopush/uk/itunes/charts/free-apps/images/20...,4.8,158.4K Ratings


In [3]:
itunesdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   app_name           100 non-null    object 
 1   category           100 non-null    object 
 2   appstore_link_url  100 non-null    object 
 3   img_src_url        100 non-null    object 
 4   star_rating        100 non-null    float64
 5   num_ratings        100 non-null    object 
dtypes: float64(1), object(5)
memory usage: 4.8+ KB


## Clean Data - Extra Number of Ratings

In [4]:
itunesdf['num_ratings_clean'] = pd.Series([int(float(y[:-1])*1000000) if "M" in y else int((float(y[:-1]))*1000) 
                                           if "K" in y else int(y) 
                                           for numrat in itunesdf.num_ratings 
                                           if (y:= numrat.split()[0].strip()) is not None])

In [5]:
itunesdf.head()

Unnamed: 0,app_name,category,appstore_link_url,img_src_url,star_rating,num_ratings,num_ratings_clean
0,SoundCloud - Music & Audio,Music,https://apps.apple.com/gb/app/soundcloud-music...,/autopush/uk/itunes/charts/free-apps/images/20...,4.4,10.2K Ratings,10200
1,Bolt,Travel,https://apps.apple.com/gb/app/bolt/id675033630...,/autopush/uk/itunes/charts/free-apps/images/20...,4.9,29.9K Ratings,29900
2,Google Drive – online backup,Productivity,https://apps.apple.com/gb/app/google-drive-onl...,/autopush/uk/itunes/charts/free-apps/images/20...,4.7,236.4K Ratings,236400
3,Amazon Prime Video,Entertainment,https://apps.apple.com/gb/app/amazon-prime-vid...,/autopush/uk/itunes/charts/free-apps/images/20...,4.7,117.6K Ratings,117600
4,Depop - Buy and sell fashion,Shopping,https://apps.apple.com/gb/app/depop-buy-and-se...,/autopush/uk/itunes/charts/free-apps/images/20...,4.8,158.4K Ratings,158400


In [6]:
itunesdf.columns

Index(['app_name', 'category', 'appstore_link_url', 'img_src_url',
       'star_rating', 'num_ratings', 'num_ratings_clean'],
      dtype='object')

## Top Apps based on Star Ratings

In [7]:
topappsdf = itunesdf.sort_values(by=['star_rating','num_ratings_clean','app_name'],ascending=[False,False,True])
topappsdf = topappsdf[['app_name','star_rating', 'num_ratings_clean']]
topappsdf

Unnamed: 0,app_name,star_rating,num_ratings_clean
25,Shazam,4.9,694400
1,Bolt,4.9,29900
30,Barclays,4.8,856200
36,HSBC UK Mobile Banking,4.8,543600
48,Booking.com Travel Deals,4.8,540400
...,...,...,...
71,B&M Stores,3.1,103
75,Amazon,3.0,11300
11,SaraMart,2.9,108
83,Facebook,2.6,37500


In [8]:
#Validation Sorting Order for app_name column if same star_rating and num_ratings_clean
topappsdf[topappsdf.duplicated(subset=['star_rating','num_ratings_clean'],keep=False)]

Unnamed: 0,app_name,star_rating,num_ratings_clean
81,Spotify New Music and Podcasts,4.7,1600000
95,WhatsApp Messenger,4.7,1600000


## Number of Apps per Category

In [9]:
itunesdf.category.unique()

array(['Music', 'Travel', 'Productivity', 'Entertainment', 'Shopping',
       'Games', 'Food & Drink', 'Finance', 'Photo & Video', 'Education',
       'Social Networking', 'Reference', 'Utilities', 'Navigation',
       'Health & Fitness', 'Business'], dtype=object)

In [10]:
cat_grp = itunesdf.groupby(by='category')

In [11]:
cat_grp.size()

category
Business              2
Education             5
Entertainment         4
Finance               6
Food & Drink          4
Games                34
Health & Fitness      2
Music                 4
Navigation            2
Photo & Video         5
Productivity          6
Reference             1
Shopping             11
Social Networking     5
Travel                6
Utilities             3
dtype: int64

In [12]:
cat_grp.size().index

Index(['Business', 'Education', 'Entertainment', 'Finance', 'Food & Drink',
       'Games', 'Health & Fitness', 'Music', 'Navigation', 'Photo & Video',
       'Productivity', 'Reference', 'Shopping', 'Social Networking', 'Travel',
       'Utilities'],
      dtype='object', name='category')

## Average Apps Rating per Category

In [13]:
cat_series = cat_grp.apply(lambda q: sum(q['star_rating']*q['num_ratings_clean'])/sum(q['num_ratings_clean']))
cat_series.sort_values(ascending=False)

category
Finance              4.796536
Music                4.757942
Travel               4.715657
Photo & Video        4.704613
Health & Fitness     4.700000
Shopping             4.697098
Navigation           4.688176
Food & Drink         4.665683
Social Networking    4.644088
Entertainment        4.643281
Productivity         4.626733
Games                4.611581
Business             4.582095
Education            4.558274
Reference            4.400000
Utilities            4.299578
dtype: float64

## Top Rated App per Category

In [14]:
sorted_itunesdf = itunesdf.sort_values(by=['star_rating','num_ratings_clean'],ascending=False)
sorted_itunesdf.loc[sorted_itunesdf.groupby('category')['star_rating'].idxmax()][['category','app_name','star_rating']]

Unnamed: 0,category,app_name,star_rating
64,Business,Microsoft Teams,4.7
42,Education,Duolingo,4.7
91,Entertainment,TikTok - Make Your Day,4.7
30,Finance,Barclays,4.8
98,Food & Drink,Just Eat - Food Delivery,4.7
49,Games,8 Ball Pool™,4.7
50,Health & Fitness,MyFitnessPal,4.7
25,Music,Shazam,4.9
82,Navigation,Google Maps - Transit & Food,4.7
10,Photo & Video,FreePrints - Photos Delivered,4.8
