# Analyzing AppStore Data: Unveiling Insights into Ratings, Pricing, and Download Counts

By analyzing a CSV file containing AppStore data, we can gain valuable insights into **user ratings**, **pricing**, and **download counts**. Ratings serve as indicators of user satisfaction, while pricing data helps understand pricing strategies and their impact on user behavior. Download counts provide a measure of app popularity and success. Analyzing these metrics can offer valuable information on user preferences, app performance, and market trends, enabling developers to make informed decisions and improve their apps' quality and profitability.

In [3]:
# import and set data vars
import csv

with open('AppleStore.csv') as csv_file:
    reader = csv.reader(csv_file, delimiter=',')
    appstore = list(reader)
    appstore_cols = appstore[0]
    appstore_data = appstore[1:]
    
with open('googleplaystore.csv') as csv_file:
    reader = csv.reader(csv_file, delimiter=',')
    playstore = list(reader)
    playstore_cols = playstore[0]
    playstore_data = playstore[1:]  
    
#define explore data func    
def explore_data(dataset, count):
    return dataset[0:count]

In [6]:
#check data
print(explore_data(appstore_data, 1))
print(explore_data(playstore_data, 1))

[['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']]
[['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']]


In [9]:
# delete 10472 because it causes error (run it once)
del playstore_data[10472]

In [17]:
# sort duplicate apps.

duplicates = []
uniques = []

for app in playstore_data:
    name = app[0]
    if name in uniques:
        duplicates.append(name)
    else:
        uniques.append(name)
        
print('Duplicate count on playstore: ', len(duplicates))

Duplicate count on playstore:  1181


In [25]:
# select only the highest reviews

max_rev = {}

for app in playstore_data:
    name = app[0]
    revs = float(app[3])
    if name in max_rev and max_rev[name] < revs:
        max_rev[name] = revs
    elif name not in max_rev:
        max_rev[name] = revs
        
print('Expected length:', len(playstore_data) - len(duplicates))
print('Actual length:', len(max_rev))

Expected length: 9659
Actual length: 9659


In [43]:
android_clean = []
ios_clean = []
already_added = []

for app in playstore_data:
    name = app[0]
    n_reviews = float(app[3])
    
    if (n_reviews == max_rev[name]) and (name not in already_added):
        android_clean.append(app)
        already_added.append(name)
    
explore_data(already_added, 3)   

['Photo Editor & Candy Camera & Grid & ScrapBook',
 'U Launcher Lite – FREE Live Cool Themes, Hide Apps',
 'Sketch - Draw & Paint']

In [44]:
# check if ascii function

def is_english(string):
    count = 0
    
    for char in string:
        if ord(char) > 127:
            count += 1
    
    if count > 3:
        return False

    return True
        
print(is_english('爱奇艺PPS -《欢乐颂2》电视剧热播'))

False


In [49]:
# filter out english

android_english = []
ios_english = []

for app in android_clean:
    name = app[0]
    if is_english(name):
        android_english.append(app)
        
for app in appstore_data:
    name = app[1]
    if is_english(name):
        ios_english.append(app)
        
explore_data(ios_english, 5)

print(len(ios_english))
print(len(android_english))

6183
9614


In [51]:
# sort out free apps
android_final = []
ios_final = []

for app in android_english:
    price = app[7]
    if price == '0':
        android_final.append(app)
        
for app in ios_english:
    price = app[4]
    if price == '0.0':
        ios_final.append(app)
        
print(len(android_final))
print(len(ios_final))

8864
3222


In [63]:
# define freq and display. then output the table

def freq_table(dataset, index):
    table = {}
    total = 0
    
    for row in dataset:
        total += 1
        value = row[index]
        if value in table:
            table[value] += 1
        else:
            table[value] = 1
    
    table_percentages = {}
    for key in table:
        percentage = (table[key] / total) * 100
        table_percentages[key] = percentage 
    
    return table_percentages

def display_table(dataset, index):
    table = freq_table(dataset, index)
    table_display = []
    for key in table:
        key_val_as_tuple = (table[key], key)
        table_display.append(key_val_as_tuple)

    table_sorted = sorted(table_display, reverse = True)
    for entry in table_sorted:
        print(entry[1], ':', entry[0])

display_table(ios_final, -5)

Games : 58.16263190564867
Entertainment : 7.883302296710118
Photo & Video : 4.9658597144630665
Education : 3.662321539416512
Social Networking : 3.2898820608317814
Shopping : 2.60707635009311
Utilities : 2.5139664804469275
Sports : 2.1415270018621975
Music : 2.0484171322160147
Health & Fitness : 2.0173805090006205
Productivity : 1.7380509000620732
Lifestyle : 1.5828677839851024
News : 1.3345747982619491
Travel : 1.2414649286157666
Finance : 1.1173184357541899
Weather : 0.8690254500310366
Food & Drink : 0.8069522036002483
Reference : 0.5586592178770949
Business : 0.5276225946617008
Book : 0.4345127250155183
Navigation : 0.186219739292365
Medical : 0.186219739292365
Catalogs : 0.12414649286157665


In [64]:
prime_genre = freq_table(ios_final, -5)

for genre in prime_gen:
    total = 0
    len_genre = 0
    for app in ios_final:
        genre_app = app[-5]
        if genre_app == genre:            
            n_ratings = float(app[5])
            total += n_ratings
            len_genre += 1
    avg_n_ratings = total / len_genre
    print(genre, ':', avg_n_ratings)

NameError: name 'genres_ios' is not defined