In [64]:
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import requests
from bs4 import BeautifulSoup
from google_play_scraper import search
import time
import os
import json

# Build the seedlist

## Communication apps

In [147]:
communications_apps_urls = pd.read_excel('top_50_communications_apps_for_each_country_urls.xlsx')

In [148]:
communications_apps_1 = pd.read_excel('Communication Apps Similarweb.xlsx')
communications_apps_2 = pd.read_excel('Communication Apps Similarweb 2.xlsx')
communications_apps = pd.concat([communications_apps_1, communications_apps_2])

In [149]:
communications_apps = communications_apps_urls.merge(communications_apps, on='similarweb_url', how='left')

In [153]:
communications_apps.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4800 entries, 0 to 4860
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   similarweb_url  4800 non-null   object
 1   App             4800 non-null   object
 2   Publisher       4800 non-null   object
 3   Category        4800 non-null   object
dtypes: object(4)
memory usage: 187.5+ KB


In [152]:
communications_apps.dropna(subset=['App'], inplace=True)

In [154]:
communications_apps['App'] = communications_apps['App'].apply(lambda x: x.replace('/', '-'))

In [155]:
seed_list_communication = communications_apps.drop_duplicates(subset='App')

In [156]:
seed_list_communication

Unnamed: 0,similarweb_url,App,Publisher,Category
0,https://www.similarweb.com/top-apps/google/alg...,WhatsApp Messenger,WhatsApp LLC,Communication
1,https://www.similarweb.com/top-apps/google/alg...,Messenger,"Meta Platforms, Inc.",Communication
2,https://www.similarweb.com/top-apps/google/alg...,Telegram,Telegram FZ-LLC,Communication
3,https://www.similarweb.com/top-apps/google/alg...,Snapchat,Snap Inc,Communication
4,https://www.similarweb.com/top-apps/google/alg...,Truecaller: Identify Caller ID,Truecaller,Communication
...,...,...,...,...
4807,https://www.similarweb.com/top-apps/google/pue...,Boost Visual Voicemail,DISH Wireless L.L.C.,Communication
4815,https://www.similarweb.com/top-apps/google/ser...,Yettel SRB,Yettel Serbia,Communication
4818,https://www.similarweb.com/top-apps/google/ser...,Moj mts,mts,Communication
4828,https://www.similarweb.com/top-apps/google/ser...,mts Biznis,mts,Communication


## Social apps

In [198]:
social_apps_urls = pd.read_excel('top_50_social_apps_for_each_country_urls.xlsx')

social_apps_1 = pd.read_excel('Social Apps Similarweb.xlsx')
social_apps_2 = pd.read_excel('Social Apps Similarweb 2.xlsx')
social_apps = pd.concat([social_apps_1, social_apps_2])

social_apps = social_apps_urls.merge(social_apps, on='similarweb_url', how='left')

In [201]:
social_apps.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4800 entries, 0 to 4861
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   similarweb_url  4800 non-null   object
 1   App             4800 non-null   object
 2   Publisher       4800 non-null   object
 3   Category        4795 non-null   object
dtypes: object(4)
memory usage: 187.5+ KB


In [200]:
social_apps.dropna(subset=['App'], inplace=True)
social_apps['App'] = social_apps['App'].apply(lambda x: x.replace('/', '-'))
seed_list_social = social_apps.drop_duplicates(subset='App')

seed_list_social

Unnamed: 0,similarweb_url,App,Publisher,Category
0,https://www.similarweb.com/top-apps/google/alg...,Facebook Lite,"Meta Platforms, Inc.",Social
1,https://www.similarweb.com/top-apps/google/alg...,Facebook,"Meta Platforms, Inc.",Social
2,https://www.similarweb.com/top-apps/google/alg...,TikTok,TikTok Pte. Ltd.,Social
3,https://www.similarweb.com/top-apps/google/alg...,TikTok Lite - Save Data,TikTok Pte. Ltd.,Social
4,https://www.similarweb.com/top-apps/google/alg...,Instagram,Instagram,Social
...,...,...,...,...
4755,https://www.similarweb.com/top-apps/google/mor...,SoulChat-صديق حقيقي و قريبة,UNIVERSE ATTRACTION INTERNATIONAL LIMITED,Social
4783,https://www.similarweb.com/top-apps/google/pue...,Best Wishes: For all Occasions,Silver Apps10,Social
4797,https://www.similarweb.com/top-apps/google/pue...,SUSH Grow cute virtual animal,Emotion Studio Inc,Social
4806,https://www.similarweb.com/top-apps/google/pue...,Fun Live - Online Video Chat,GLM Technology,Social


In [49]:
# Create an Excel writer object

writer = pd.ExcelWriter('Seedlist The Most Loved Email, Messaging and Social Media Apps by Country.xlsx')

In [50]:
seed_list_communication.to_excel(writer, sheet_name='communication', index=False)
seed_list_social.to_excel(writer, sheet_name='social', index=False)

In [51]:
# Save the Excel file
writer.save()

# Develop the scraper

## Communication apps

In [157]:
folder_path = ''

In [186]:
# Iterate over the seed list
for _, app in scrape.iterrows():
    app_name = app['App']
    
    try: 
        # Search for the app
        result = search(
            app_name,
            lang="en",  
            country="us",  
            n_hits=10)

        # Save reviews to JSON file
        file_name = f'{app_name}.json'
        file_path = os.path.join(folder_path, file_name)
        with open(file_path, 'w') as json_file:
            json.dump(result, json_file, indent=4)
        time.sleep(3)
        print('Done ', app_name)
    except Exception as e:
        print(f'Exception {app_name}: {e}')

Done  Animated Stickers Maker & GIF
Done  All Email Login
Exception Login Mail For HotMail&Outlook: 'NoneType' object is not subscriptable


In [191]:
json_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]

In [192]:
for file in json_files:
    with open(os.path.join(folder_path, file), 'r') as json_file:
        json_data = json.load(json_file)
        
        file_name = file.split('.json')[0]
        appId = json_data[0]['appId']
        
        mask = communications_apps['App'] == file_name
        communications_apps.loc[mask, 'appId'] = appId


In [194]:
scrape = pd.DataFrame(communications_apps[pd.isna(communications_apps.appId)]['App'].unique(), columns=['App']) #.to_excel('scrape.xlsx', index=False)

In [195]:
scrape

Unnamed: 0,App


In [187]:
communications_apps.loc[communications_apps['App'] == 'Login Mail For HotMail&Outlook', 'appId'] = 'com.emailonline.officemail.amoemail'

In [176]:
communications_apps.loc[communications_apps['App'] == 'Trulinco: Messaging & Calls', 'appId'] = 'com.inw.trulinco'

In [177]:
communications_apps.loc[communications_apps['App'] == 'mail.com: Mail app & Cloud', 'appId'] = 'com.mail.mobile.android.mail'

In [178]:
communications_apps.loc[communications_apps['App'] == 'Personal stickers StickerMaker', 'appId'] = 'com.memeandsticker.personal'

In [179]:
communications_apps.loc[communications_apps['App'] == 'imo beta -video calls and chat', 'appId'] = 'com.imo.android.imoim'

In [180]:
communications_apps.loc[communications_apps['App'] == 'Hola Browser-Private&Fast web', 'appId'] = 'com.talpa.hibrowser'

In [181]:
communications_apps.loc[communications_apps['App'] == 'videocall - LiveTalk Videocall', 'appId'] = 'com.videocall.videochat.video.chat.call'

In [182]:
communications_apps.loc[communications_apps['App'] == 'F SMS Libre Text Philippines', 'appId'] = 'free.text.sms'

In [183]:
communications_apps.loc[communications_apps['App'] == 'gub.uy', 'appId'] = 'uy.gub.app.perfil.release'

In [193]:
communications_apps.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4800 entries, 0 to 4860
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   similarweb_url  4800 non-null   object
 1   App             4800 non-null   object
 2   Publisher       4800 non-null   object
 3   Category        4800 non-null   object
 4   appId           4800 non-null   object
dtypes: object(5)
memory usage: 225.0+ KB


In [331]:
communications_apps

Unnamed: 0,similarweb_url,App,Publisher,Category,appId,Country,alpha2Code,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star,weighted_sum,score_out_of_five
0,https://www.similarweb.com/top-apps/google/alg...,WhatsApp Messenger,WhatsApp LLC,Communication,com.whatsapp,Algeria,DZ,https://play.google.com/store/apps/details?id=...,190000000,124642625,15062961,10741084,7008012,29181989,758886234,4.066115
1,https://www.similarweb.com/top-apps/google/alg...,Messenger,"Meta Platforms, Inc.",Communication,com.facebook.orca,Algeria,DZ,https://play.google.com/store/apps/details?id=...,89100000,46053579,4949839,3550516,2744035,27834686,294041555,3.453922
2,https://www.similarweb.com/top-apps/google/alg...,Telegram,Telegram FZ-LLC,Communication,org.telegram.messenger,Algeria,DZ,https://play.google.com/store/apps/details?id=...,13900000,9333940,1028448,697534,351536,2075800,55654966,4.126485
3,https://www.similarweb.com/top-apps/google/alg...,Snapchat,Snap Inc,Communication,com.snapchat.android,Algeria,DZ,https://play.google.com/store/apps/details?id=...,35200000,15467638,1874281,1275746,792059,13867570,104114240,3.128687
4,https://www.similarweb.com/top-apps/google/alg...,Truecaller: Identify Caller ID,Truecaller,Communication,com.truecaller,Algeria,DZ,https://play.google.com/store/apps/details?id=...,21200000,15974108,1847402,1139970,556583,1471526,93264750,4.443381
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4795,https://www.similarweb.com/top-apps/google/ser...,Smart Watch app - BT notifier,Flying Phoenix,Communication,com.smartwatch.bluetooth.sync.notifications,Serbia,RS,https://play.google.com/store/apps/details?id=...,210000,161181,23739,9995,3748,4997,943339,4.631931
4796,https://www.similarweb.com/top-apps/google/ser...,Botim - Video and Voice Call,Algento Cloud Computing Limited,Communication,im.thebot.messenger,Serbia,RS,https://play.google.com/store/apps/details?id=...,1140000,798697,156109,87130,10891,65347,4966440,4.441563
4797,https://www.similarweb.com/top-apps/google/ser...,Opera Mini: Fast Web Browser,Opera,Communication,com.opera.mini.native,Serbia,RS,https://play.google.com/store/apps/details?id=...,9130000,7209486,765358,274017,160630,406301,40658474,4.612005
4798,https://www.similarweb.com/top-apps/google/ser...,Opera GX: Gaming Browser,Opera,Communication,com.opera.gx,Serbia,RS,https://play.google.com/store/apps/details?id=...,198000,153543,20823,3763,5519,5519,878853,4.645911


## Email apps

In [339]:
countries = communications_apps[['Country', 'alpha2Code']].drop_duplicates(subset=['Country'])

In [340]:
countries

Unnamed: 0,Country,alpha2Code
0,Algeria,DZ
50,Azerbaijan,AZ
100,Argentina,AR
150,Australia,AU
200,Bahrain,BH
...,...,...
4550,Bangladesh,BD
4600,Bosnia And Herzegovina,BA
4650,Morocco,MA
4700,Puerto Rico,PR


In [332]:
email_apps = communications_apps[communications_apps.Category == 'Email']

In [335]:
email_apps = email_apps.drop_duplicates(subset=['appId'])

In [337]:
email_apps[['App', 'Publisher', 'appId']]

Unnamed: 0,App,Publisher,appId
42,Email - Fast and Smart Mail,AMOBEAR TECHNOLOGY GROUP,com.fastsigninemail.securemail.bestemail
44,Temp Mail - Temporary Email,Privatix Limited,com.tempmail
64,Mail.ru - Email App,Mail.Ru Group,ru.mail.mailapp
117,All Email Connect,AI Email & Access,com.mail.inbox.allemailaccess
151,Gmail,Google LLC,com.google.android.gm
165,Yahoo Mail – Organized Email,Yahoo,com.yahoo.mobile.client.android.mail
170,Proton Mail: Encrypted Email,Proton AG,ch.protonmail.android
187,mail.com: Mail app & Cloud,1&1 Mail & Media Inc,com.mail.mobile.android.mail
195,GMX - Mail & Cloud,GMX,de.gmx.mobile.android.mail
198,K-9 Mail,Mozilla Thunderbird,com.fsck.k9


In [342]:
# Creating a list to hold the results
results = []

# Loop through each country and email app to generate the URLs and build the final DataFrame
for _, country_row in countries.iterrows():
    for _, app_row in email_apps.iterrows():
        google_play_url = f"https://play.google.com/store/apps/details?id={app_row['appId']}&hl=en_GB&gl={country_row['alpha2Code']}"
        results.append({
            'Country': country_row['Country'],
            'alpha2Code': country_row['alpha2Code'],
            'App': app_row['App'],
            'Publisher': app_row['Publisher'],
            'appId': app_row['appId'],
            'google_play_url': google_play_url
        })

# Convert the results into a DataFrame
email_df = pd.DataFrame(results)

In [343]:
email_df

Unnamed: 0,Country,alpha2Code,App,Publisher,appId,google_play_url
0,Algeria,DZ,Email - Fast and Smart Mail,AMOBEAR TECHNOLOGY GROUP,com.fastsigninemail.securemail.bestemail,https://play.google.com/store/apps/details?id=...
1,Algeria,DZ,Temp Mail - Temporary Email,Privatix Limited,com.tempmail,https://play.google.com/store/apps/details?id=...
2,Algeria,DZ,Mail.ru - Email App,Mail.Ru Group,ru.mail.mailapp,https://play.google.com/store/apps/details?id=...
3,Algeria,DZ,All Email Connect,AI Email & Access,com.mail.inbox.allemailaccess,https://play.google.com/store/apps/details?id=...
4,Algeria,DZ,Gmail,Google LLC,com.google.android.gm,https://play.google.com/store/apps/details?id=...
...,...,...,...,...,...,...
3739,Serbia,RS,Infomaniak Mail,Infomaniak,com.infomaniak.mail,https://play.google.com/store/apps/details?id=...
3740,Serbia,RS,BT Email,BT Group PLC,com.bt.mail.btprod,https://play.google.com/store/apps/details?id=...
3741,Serbia,RS,All Email Login,All Email Login,com.allemail.login,https://play.google.com/store/apps/details?id=...
3742,Serbia,RS,All Email Access: Mail Inbox,Appsbuyout Dev,info.myapp.allemailaccess,https://play.google.com/store/apps/details?id=...


In [344]:
#email_df.to_csv('email_df.csv', index=False)

In [345]:
email_apps = pd.read_csv('email_apps_scraped.csv')

In [348]:
email_apps = email_apps.dropna(subset=['reviews']).reset_index(drop=True)

In [349]:
email_apps.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2614 entries, 0 to 2613
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Country          2614 non-null   object
 1   alpha2Code       2614 non-null   object
 2   App              2614 non-null   object
 3   Publisher        2614 non-null   object
 4   appId            2614 non-null   object
 5   google_play_url  2614 non-null   object
 6   reviews          2614 non-null   object
 7   five_star        2614 non-null   object
 8   four_star        2614 non-null   object
 9   three_star       2614 non-null   object
 10  two_star         2614 non-null   object
 11  one_star         2614 non-null   object
dtypes: object(12)
memory usage: 245.2+ KB


In [350]:
email_apps

Unnamed: 0,Country,alpha2Code,App,Publisher,appId,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star
0,Algeria,DZ,Email - Fast and Smart Mail,AMOBEAR TECHNOLOGY GROUP,com.fastsigninemail.securemail.bestemail,https://play.google.com/store/apps/details?id=...,83.5K reviews,55756,11046,3945,789,7890
1,Algeria,DZ,Temp Mail - Temporary Email,Privatix Limited,com.tempmail,https://play.google.com/store/apps/details?id=...,325K reviews,231420,27484,18983,10554,31734
2,Algeria,DZ,Mail.ru - Email App,Mail.Ru Group,ru.mail.mailapp,https://play.google.com/store/apps/details?id=...,3.14M reviews,2135250,386820,154728,139255,201146
3,Algeria,DZ,All Email Connect,AI Email & Access,com.mail.inbox.allemailaccess,https://play.google.com/store/apps/details?id=...,10.6K reviews,4517,4517,0,0,1505
4,Algeria,DZ,Gmail,Google LLC,com.google.android.gm,https://play.google.com/store/apps/details?id=...,13M reviews,8493138,1030231,636912,372862,1925536
...,...,...,...,...,...,...,...,...,...,...,...,...
2609,Serbia,RS,Libero Mail,Italiaonline S.p.A.,it.italiaonline.mail,https://play.google.com/store/apps/details?id=...,99.5K reviews,52183,34789,0,0,8697
2610,Serbia,RS,Gmail Go,Google LLC,com.google.android.gm.lite,https://play.google.com/store/apps/details?id=...,145K reviews,95879,11279,8459,4229,19739
2611,Serbia,RS,Email for Hotmail & Outlook,Amobear Application - Diavostar PTE. LTD,com.microsoft.office.outlook,https://play.google.com/store/apps/details?id=...,9.48M reviews,7789110,1056052,115354,71915,144314
2612,Serbia,RS,All Email Access: Mail Inbox,Appsbuyout Dev,info.myapp.allemailaccess,https://play.google.com/store/apps/details?id=...,62.3K reviews,46422,13754,0,0,1719


In [197]:
#communications_apps.to_csv('communications_apps.csv', index=False)

## Social apps

In [202]:
folder_path = ''

In [225]:
# Iterate over the seed list
for _, app in scrape.iterrows():
    app_name = app['App']
    
    try: 
        # Search for the app
        result = search(
            app_name,
            lang="en",  
            country="us",  
            n_hits=10)

        # Save reviews to JSON file
        file_name = f'{app_name}.json'
        file_path = os.path.join(folder_path, file_name)
        with open(file_path, 'w') as json_file:
            json.dump(result, json_file, indent=4)
        time.sleep(3)
        print('Done', _, app_name)
    except Exception as e:
        print(f'Exception {app_name}: {e}')

Exception WeParty-Live Chat&Voice Party: 'NoneType' object is not subscriptable
Done 1 Best Wishes: For all Occasions
Done 2 SUSH Grow cute virtual animal


In [229]:
json_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]

for file in json_files:
    with open(os.path.join(folder_path, file), 'r') as json_file:
        json_data = json.load(json_file)
        
        file_name = file.split('.json')[0]
        appId = json_data[0]['appId']
        
        mask = social_apps['App'] == file_name
        social_apps.loc[mask, 'appId'] = appId


In [230]:
scrape = pd.DataFrame(social_apps[pd.isna(social_apps.appId)]['App'].unique(), columns=['App']) #.to_excel('scrape.xlsx', index=False)

In [231]:
scrape

Unnamed: 0,App


In [226]:
social_apps.loc[social_apps['App'] == 'WeParty-Live Chat&Voice Party', 'appId'] = 'com.partyjoy.weparty'

In [204]:
social_apps.loc[social_apps['App'] == 'Ayar-Chat&Play', 'appId'] = 'com.star.ayar.android'

In [205]:
social_apps.loc[social_apps['App'] == 'Xviews - Video Chat&Hook Up', 'appId'] = 'com.waooolive.android'

In [206]:
social_apps.loc[social_apps['App'] == 'Blind Stalk See Hidden Profile', 'appId'] = 'net.instalk.app'

In [207]:
social_apps.loc[social_apps['App'] == 'See log - view hidden profile', 'appId'] = 'com.seelog.app'

In [208]:
social_apps.loc[social_apps['App'] == 'Achat- Live Chat& Make Friends', 'appId'] = 'com.ahchat.app'

In [209]:
social_apps.loc[social_apps['App'] == 'Scret: anonymous Q&A', 'appId'] = 'com.scretmobile'

In [210]:
social_apps.loc[social_apps['App'] == 'Trabajando .COM', 'appId'] = 'com.trabajando'

In [211]:
social_apps.loc[social_apps['App'] == 'Timehop - Memories Then & Now', 'appId'] = 'com.timehop'

In [212]:
social_apps.loc[social_apps['App'] == 'RandomChat - Chat in Japanese', 'appId'] = 'com.randomchat'

In [213]:
social_apps.loc[social_apps['App'] == 'ZonePane for Mastodon&Bluesky', 'appId'] = 'com.zonepane'

In [214]:
social_apps.loc[social_apps['App'] == 'My city', 'appId'] = 'mycity.home'

In [215]:
social_apps.loc[social_apps['App'] == 'Whats Group Links Join Groups', 'appId'] = 'com.thwhatsn.agrop.whsap'

In [216]:
social_apps.loc[social_apps['App'] == 'Spottd', 'appId'] = 'app.spottd'

In [217]:
social_apps['Category'] = social_apps['Category'].fillna('Social')

In [232]:
social_apps.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4800 entries, 0 to 4861
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   similarweb_url  4800 non-null   object
 1   App             4800 non-null   object
 2   Publisher       4800 non-null   object
 3   Category        4800 non-null   object
 4   appId           4800 non-null   object
dtypes: object(5)
memory usage: 225.0+ KB


In [233]:
social_apps.to_csv('social_apps.csv', index=False)

In [234]:
social_apps

Unnamed: 0,similarweb_url,App,Publisher,Category,appId
0,https://www.similarweb.com/top-apps/google/alg...,Facebook Lite,"Meta Platforms, Inc.",Social,com.facebook.lite
1,https://www.similarweb.com/top-apps/google/alg...,Facebook,"Meta Platforms, Inc.",Social,com.facebook.katana
2,https://www.similarweb.com/top-apps/google/alg...,TikTok,TikTok Pte. Ltd.,Social,com.zhiliaoapp.musically
3,https://www.similarweb.com/top-apps/google/alg...,TikTok Lite - Save Data,TikTok Pte. Ltd.,Social,com.zhiliaoapp.musically
4,https://www.similarweb.com/top-apps/google/alg...,Instagram,Instagram,Social,com.instagram.android
...,...,...,...,...,...
4857,https://www.similarweb.com/top-apps/google/ser...,Xviews - Video Chat&Hook Up,Wavechat Inc.,Social,com.waooolive.android
4858,https://www.similarweb.com/top-apps/google/ser...,Chat Alternative — android app,Video Chat Alt,Social,com.chatroullete.alternative
4859,https://www.similarweb.com/top-apps/google/ser...,BeReal. Your friends for real.,BeReal,Social,com.bereal.ft
4860,https://www.similarweb.com/top-apps/google/ser...,FollowMeter for Instagram,Followmeter,Social,com.beakerapps.instameter2


# Scrape apps

## Communications apps

In [None]:
communications_apps['Country'] = communications_apps['similarweb_url'].apply(lambda x: x.split('/')[5])

In [None]:
len(communications_apps['Country'].unique())

In [None]:
for country in countries:
    mask = communications_apps['Country'] == country['urlCode']
    
    if mask.any():
        communications_apps.loc[mask, 'alpha2Code'] = country['alpha2Code']

In [None]:
communications_apps.loc[communications_apps['Country'] == 'austria', 'alpha2Code'] = 'AT'
communications_apps.loc[communications_apps['Country'] == 'bangladesh', 'alpha2Code'] = 'BD'
communications_apps.loc[communications_apps['Country'] == 'bosnia-and-herzegovina', 'alpha2Code'] = 'BA'
communications_apps.loc[communications_apps['Country'] == 'morocco', 'alpha2Code'] = 'MA'
communications_apps.loc[communications_apps['Country'] == 'puerto-rico', 'alpha2Code'] = 'PR'
communications_apps.loc[communications_apps['Country'] == 'serbia', 'alpha2Code'] = 'RS'

In [None]:
communications_apps['google_play_url'] = communications_apps.apply(lambda row: f"https://play.google.com/store/apps/details?id={row['appId']}&hl=en_GB&gl={row['alpha2Code']}", axis=1)


In [None]:
for index, row in communications_apps[134:].iterrows():
    url = row['google_play_url']

      try:
            response = requests.get(url)
            response.raise_for_status()

            soup = BeautifulSoup(response.text)
            reviews = soup.find_all('div', 'g1rdde')[0].text
            breakdowns = [x.get('title') for x in soup.find_all('div', 'RutFAf wcB8se')]

            five_star = breakdowns[0]
            four_star = breakdowns[1]
            three_star = breakdowns[2]
            two_star = breakdowns[3]
            one_star = breakdowns[4]

            communications_apps.loc[index, 'reviews'] = reviews
            communications_apps.loc[index, 'five_star'] = five_star
            communications_apps.loc[index, 'four_star'] = four_star
            communications_apps.loc[index, 'three_star'] = three_star
            communications_apps.loc[index, 'two_star'] = two_star
            communications_apps.loc[index, 'one_star'] = one_star

            communications_apps.to_csv(folder_path + 'communications_apps_scraped.csv', index=False)
            print('Done: ', index)
            time.sleep(3)

    except Exception as e:
            print(f'Error with {index}: {e}')


In [235]:
communications_apps = pd.read_csv('communications_apps_scraped.csv')

In [236]:
communications_apps.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4800 entries, 0 to 4799
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   similarweb_url   4800 non-null   object
 1   App              4800 non-null   object
 2   Publisher        4800 non-null   object
 3   Category         4800 non-null   object
 4   appId            4800 non-null   object
 5   Country          4800 non-null   object
 6   alpha2Code       4800 non-null   object
 7   google_play_url  4800 non-null   object
 8   reviews          4647 non-null   object
 9   five_star        4647 non-null   object
 10  four_star        4647 non-null   object
 11  three_star       4647 non-null   object
 12  two_star         4647 non-null   object
 13  one_star         4647 non-null   object
dtypes: object(14)
memory usage: 525.1+ KB


In [241]:
for url in communications_apps[pd.isna(communications_apps.reviews)]['google_play_url'].unique():
    print(url)

https://play.google.com/store/apps/details?id=com.innocaption.vpdp&hl=en_GB&gl=DZ
https://play.google.com/store/apps/details?id=com.innocaption.vpdp&hl=en_GB&gl=AZ
https://play.google.com/store/apps/details?id=com.destinyitemmanager.app&hl=en_GB&gl=AZ
https://play.google.com/store/apps/details?id=com.romanticstickers.lovestickers.wastickers.ly&hl=en_GB&gl=AZ
https://play.google.com/store/apps/details?id=com.miclaro.app&hl=en_GB&gl=AR
https://play.google.com/store/apps/details?id=com.telego.phone.android&hl=en_GB&gl=AR
https://play.google.com/store/apps/details?id=com.deucetek.tokee&hl=en_GB&gl=AR
https://play.google.com/store/apps/details?id=com.innocaption.vpdp&hl=en_GB&gl=AU
https://play.google.com/store/apps/details?id=com.vikingcruises.mvjapp&hl=en_GB&gl=BE
https://play.google.com/store/apps/details?id=com.cscsw.cscservicerequest&hl=en_GB&gl=BE
https://play.google.com/store/apps/details?id=com.innocaption.vpdp&hl=en_GB&gl=BE
https://play.google.com/store/apps/details?id=com.orange.

In [237]:
communications_apps

Unnamed: 0,similarweb_url,App,Publisher,Category,appId,Country,alpha2Code,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star
0,https://www.similarweb.com/top-apps/google/alg...,WhatsApp Messenger,WhatsApp LLC,Communication,com.whatsapp,algeria,DZ,https://play.google.com/store/apps/details?id=...,190M reviews,124642625,15062961,10741084,7008012,29181989
1,https://www.similarweb.com/top-apps/google/alg...,Messenger,"Meta Platforms, Inc.",Communication,com.facebook.orca,algeria,DZ,https://play.google.com/store/apps/details?id=...,89.1M reviews,46053579,4949839,3550516,2744035,27834686
2,https://www.similarweb.com/top-apps/google/alg...,Telegram,Telegram FZ-LLC,Communication,org.telegram.messenger,algeria,DZ,https://play.google.com/store/apps/details?id=...,13.9M reviews,9333940,1028448,697534,351536,2075800
3,https://www.similarweb.com/top-apps/google/alg...,Snapchat,Snap Inc,Communication,com.snapchat.android,algeria,DZ,https://play.google.com/store/apps/details?id=...,35.2M reviews,15467638,1874281,1275746,792059,13867570
4,https://www.similarweb.com/top-apps/google/alg...,Truecaller: Identify Caller ID,Truecaller,Communication,com.truecaller,algeria,DZ,https://play.google.com/store/apps/details?id=...,21.2M reviews,15974108,1847402,1139970,556583,1471526
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4795,https://www.similarweb.com/top-apps/google/ser...,Smart Watch app - BT notifier,Flying Phoenix,Communication,com.smartwatch.bluetooth.sync.notifications,serbia,RS,https://play.google.com/store/apps/details?id=...,210K reviews,161181,23739,9995,3748,4997
4796,https://www.similarweb.com/top-apps/google/ser...,Botim - Video and Voice Call,Algento Cloud Computing Limited,Communication,im.thebot.messenger,serbia,RS,https://play.google.com/store/apps/details?id=...,1.14M reviews,798697,156109,87130,10891,65347
4797,https://www.similarweb.com/top-apps/google/ser...,Opera Mini: Fast Web Browser,Opera,Communication,com.opera.mini.native,serbia,RS,https://play.google.com/store/apps/details?id=...,9.13M reviews,7209486,765358,274017,160630,406301
4798,https://www.similarweb.com/top-apps/google/ser...,Opera GX: Gaming Browser,Opera,Communication,com.opera.gx,serbia,RS,https://play.google.com/store/apps/details?id=...,198K reviews,153543,20823,3763,5519,5519


## Social apps

In [None]:
social_apps['Country'] = social_apps['similarweb_url'].apply(lambda x: x.split('/')[5])

In [None]:
len(social_apps['Country'].unique())

In [None]:
for country in countries:
    mask = social_apps['Country'] == country['urlCode']
    
    if mask.any():
        social_apps.loc[mask, 'alpha2Code'] = country['alpha2Code']

In [None]:
social_apps.loc[social_apps['Country'] == 'austria', 'alpha2Code'] = 'AT'
social_apps.loc[social_apps['Country'] == 'bangladesh', 'alpha2Code'] = 'BD'
social_apps.loc[social_apps['Country'] == 'bosnia-and-herzegovina', 'alpha2Code'] = 'BA'
social_apps.loc[social_apps['Country'] == 'morocco', 'alpha2Code'] = 'MA'
social_apps.loc[social_apps['Country'] == 'puerto-rico', 'alpha2Code'] = 'PR'
social_apps.loc[social_apps['Country'] == 'serbia', 'alpha2Code'] = 'RS'

In [None]:
social_apps['google_play_url'] = social_apps.apply(lambda row: f"https://play.google.com/store/apps/details?id={row['appId']}&hl=en_GB&gl={row['alpha2Code']}", axis=1)


In [None]:
for index, row in social_apps[58:].iterrows():
  url = row['google_play_url']

  try:
    response = requests.get(url)
    response.raise_for_status()

    soup = BeautifulSoup(response.text)
    reviews = soup.find_all('div', 'g1rdde')[0].text
    breakdowns = [x.get('title') for x in soup.find_all('div', 'RutFAf wcB8se')]

    five_star = breakdowns[0]
    four_star = breakdowns[1]
    three_star = breakdowns[2]
    two_star = breakdowns[3]
    one_star = breakdowns[4]

    social_apps.loc[index, 'reviews'] = reviews
    social_apps.loc[index, 'five_star'] = five_star
    social_apps.loc[index, 'four_star'] = four_star
    social_apps.loc[index, 'three_star'] = three_star
    social_apps.loc[index, 'two_star'] = two_star
    social_apps.loc[index, 'one_star'] = one_star

    social_apps.to_csv(folder_path + 'social_apps_scraped.csv', index=False)
    print('Done: ', index)
    time.sleep(3)

  except Exception as e:
        print(f'Error with {index}: {e}')

In [253]:
social_apps = pd.read_csv('social_apps_scraped.csv')

In [254]:
social_apps.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4800 entries, 0 to 4799
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   similarweb_url   4800 non-null   object
 1   App              4800 non-null   object
 2   Publisher        4800 non-null   object
 3   Category         4800 non-null   object
 4   appId            4800 non-null   object
 5   Country          4800 non-null   object
 6   alpha2Code       4800 non-null   object
 7   google_play_url  4800 non-null   object
 8   reviews          4587 non-null   object
 9   five_star        4587 non-null   object
 10  four_star        4587 non-null   object
 11  three_star       4587 non-null   object
 12  two_star         4587 non-null   object
 13  one_star         4587 non-null   object
dtypes: object(14)
memory usage: 525.1+ KB


In [255]:
for url in social_apps[pd.isna(social_apps.reviews)]['google_play_url'].unique():
    print(url)

https://play.google.com/store/apps/details?id=app.nicegram&hl=en_GB&gl=DZ
https://play.google.com/store/apps/details?id=com.alleote.alleote&hl=en_GB&gl=DZ
https://play.google.com/store/apps/details?id=com.video.welive&hl=en_GB&gl=AZ
https://play.google.com/store/apps/details?id=app.nicegram&hl=en_GB&gl=AZ
https://play.google.com/store/apps/details?id=com.matchu.machat&hl=en_GB&gl=AZ
https://play.google.com/store/apps/details?id=com.tiktok.tv&hl=en_GB&gl=AR
https://play.google.com/store/apps/details?id=sw.es.galuresa&hl=en_GB&gl=AR
https://play.google.com/store/apps/details?id=inc.peer.app&hl=en_GB&gl=AR
https://play.google.com/store/apps/details?id=com.goodmorningwishes.dailywishesandblessing&hl=en_GB&gl=AR
https://play.google.com/store/apps/details?id=app.nicegram&hl=en_GB&gl=AM
https://play.google.com/store/apps/details?id=com.nextdoor&hl=en_GB&gl=BE
https://play.google.com/store/apps/details?id=com.videolite&hl=en_GB&gl=BE
https://play.google.com/store/apps/details?id=com.tiktok.tv&

In [256]:
social_apps

Unnamed: 0,similarweb_url,App,Publisher,Category,appId,Country,alpha2Code,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star
0,https://www.similarweb.com/top-apps/google/alg...,Facebook Lite,"Meta Platforms, Inc.",Social,com.facebook.lite,algeria,DZ,https://play.google.com/store/apps/details?id=...,26.3M reviews,7437778,784400,631995,569459,16228483
1,https://www.similarweb.com/top-apps/google/alg...,Facebook,"Meta Platforms, Inc.",Social,com.facebook.katana,algeria,DZ,https://play.google.com/store/apps/details?id=...,145M reviews,39286109,3968185,2898191,1885699,91294973
2,https://www.similarweb.com/top-apps/google/alg...,TikTok,TikTok Pte. Ltd.,Social,com.zhiliaoapp.musically,algeria,DZ,https://play.google.com/store/apps/details?id=...,62.7M reviews,40186720,4186403,2742211,1753926,10945611
3,https://www.similarweb.com/top-apps/google/alg...,TikTok Lite - Save Data,TikTok Pte. Ltd.,Social,com.zhiliaoapp.musically,algeria,DZ,https://play.google.com/store/apps/details?id=...,62.7M reviews,40186486,4186378,2742195,1753916,10945548
4,https://www.similarweb.com/top-apps/google/alg...,Instagram,Instagram,Social,com.instagram.android,algeria,DZ,https://play.google.com/store/apps/details?id=...,153M reviews,40835101,4352180,2569074,1721891,98732195
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4795,https://www.similarweb.com/top-apps/google/ser...,Xviews - Video Chat&Hook Up,Wavechat Inc.,Social,com.waooolive.android,serbia,RS,https://play.google.com/store/apps/details?id=...,12.7K reviews,10892,907,0,0,907
4796,https://www.similarweb.com/top-apps/google/ser...,Chat Alternative — android app,Video Chat Alt,Social,com.chatroullete.alternative,serbia,RS,https://play.google.com/store/apps/details?id=...,135K reviews,53523,7646,6241,4993,54772
4797,https://www.similarweb.com/top-apps/google/ser...,BeReal. Your friends for real.,BeReal,Social,com.bereal.ft,serbia,RS,https://play.google.com/store/apps/details?id=...,291K reviews,202393,40333,8357,14171,22891
4798,https://www.similarweb.com/top-apps/google/ser...,FollowMeter for Instagram,Followmeter,Social,com.beakerapps.instameter2,serbia,RS,https://play.google.com/store/apps/details?id=...,144K reviews,73741,17470,3860,3860,38801


# Clean data

## Communications apps

In [243]:
communications_apps.dropna(subset=['reviews'], inplace=True)

In [245]:
# Assign "Email" category to apps containing "mail" or "email"
communications_apps.loc[communications_apps['App'].str.contains('mail|email', case=False), 'Category'] = 'Email'

In [249]:
communications_apps.loc[communications_apps['App'].str.contains('Voicemail', case=False), 'Category'] = 'Communication'

In [246]:
communications_apps.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4647 entries, 0 to 4799
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   similarweb_url   4647 non-null   object
 1   App              4647 non-null   object
 2   Publisher        4647 non-null   object
 3   Category         4647 non-null   object
 4   appId            4647 non-null   object
 5   Country          4647 non-null   object
 6   alpha2Code       4647 non-null   object
 7   google_play_url  4647 non-null   object
 8   reviews          4647 non-null   object
 9   five_star        4647 non-null   object
 10  four_star        4647 non-null   object
 11  three_star       4647 non-null   object
 12  two_star         4647 non-null   object
 13  one_star         4647 non-null   object
dtypes: object(14)
memory usage: 544.6+ KB


In [251]:
communications_apps[communications_apps.Category=='Email']['App'].unique()

array(['Email - Fast and Smart Mail', 'Temp Mail - Temporary Email',
       'Mail.ru - Email App', 'All Email Connect', 'Gmail',
       'Yahoo Mail – Organized Email', 'Proton Mail: Encrypted Email',
       'mail.com: Mail app & Cloud', 'GMX - Mail & Cloud', 'K-9 Mail',
       'Email App - Manage Your Inbox', 'Mail - All Email Accounts',
       'ABV Mail', 'Yandex Mail', 'TypeApp mail - email app',
       'FairEmail, privacy aware email', 'Email.cz', 'Centrum.cz mail',
       'E-Mail for Outlook & Hotmail', 'SFR Mail',
       'Laposte.net – Votre boîte mail', 'WEB.DE Mail & Cloud',
       'Telekom Mail - E-Mail-Programm', '1&1 Mail',
       'freenet Mail - E-Mail Postfach', 'freemail', 'Rediffmail',
       'Libero Mail', 'Virgilio Mail - Email App', 'Tiscali Mail',
       'Legalmail', 'Yahoo! Mail', 'App for Gmail SMS etc：CosmoSia',
       'NAVER Mail', 'Daum Mail - 다음 메일', 'Gmail Go',
       'Email for Hotmail & Outlook', 'Infomaniak Mail', 'BT Email',
       'All Email Login', 'All E

In [260]:
communications_apps['reviews'] = communications_apps['reviews'].apply(lambda x: x.replace(' reviews', '').replace('info', ''))

In [262]:
def convert_to_int(value):
    if 'M' in value:
        return int(float(value.replace('M', '')) * 1000000)
    elif 'K' in value:
        return int(float(value.replace('K', '')) * 1000)
    else:
        return int(value)

In [263]:
communications_apps['reviews'] = communications_apps['reviews'].apply(convert_to_int)

In [267]:
#communications_apps[communications_apps.reviews ==7]['google_play_url'][1244]

'https://play.google.com/store/apps/details?id=lu.vii.mobile&hl=en_GB&gl=FI'

In [271]:
communications_apps['five_star'] = communications_apps['five_star'].apply(lambda x: int(x.replace(',', '')))
communications_apps['four_star'] = communications_apps['four_star'].apply(lambda x: int(x.replace(',', '')))
communications_apps['three_star'] = communications_apps['three_star'].apply(lambda x: int(x.replace(',', '')))
communications_apps['two_star'] = communications_apps['two_star'].apply(lambda x: int(x.replace(',', '')))
communications_apps['one_star'] = communications_apps['one_star'].apply(lambda x: int(x.replace(',', '')))

In [304]:
communications_apps['Country'] = communications_apps['Country'].apply(lambda x: x.replace('-', ' '))

In [311]:
communications_apps['Country'] = communications_apps['Country'].apply(lambda x: x.title())

In [312]:
communications_apps['Country'].unique()

array(['Algeria', 'Azerbaijan', 'Argentina', 'Australia', 'Bahrain',
       'Armenia', 'Belgium', 'Bolivia Plurinational State Of', 'Brazil',
       'Bulgaria', 'Belarus', 'Cambodia', 'Canada', 'Sri Lanka', 'Chile',
       'Taiwan', 'Colombia', 'Costa Rica', 'Croatia', 'Czech Republic',
       'Denmark', 'Dominican Republic', 'Ecuador', 'El Salvador',
       'Finland', 'France', 'Germany', 'Ghana', 'Greece', 'Guatemala',
       'Honduras', 'Hong Kong', 'Hungary', 'India', 'Indonesia',
       'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Kazakhstan',
       'Jordan', 'Kenya', 'Korea Republic Of', 'Kuwait', 'Kyrgyzstan',
       'Latvia', 'Lithuania', 'Malaysia', 'Mexico', 'Moldova Republic Of',
       'Oman', 'Nepal', 'Netherlands', 'New Zealand', 'Nigeria', 'Norway',
       'Pakistan', 'Panama', 'Paraguay', 'Peru', 'Philippines', 'Poland',
       'Portugal', 'Qatar', 'Romania', 'Russian Federation',
       'Saudi Arabia', 'Singapore', 'Slovakia', 'Vietnam', 'Slovenia',
       'Sout

In [275]:
weights = {
    'five_star': 5,
    'four_star': 4,
    'three_star': 3,
    'two_star': 2,
    'one_star': 1
}

In [277]:
star_rating_columns = ['five_star', 'four_star', 'three_star', 'two_star', 'one_star']

# Calculate the weighted sum for each row based on star rating columns
communications_apps['weighted_sum'] = communications_apps.apply(lambda row: sum(row[star] * weights[star] for star in star_rating_columns), axis=1)


In [281]:
# Calculate the total number of reviews
total_reviews = communications_apps[star_rating_columns].sum(axis=1)

In [285]:
# Calculate the score out of five
communications_apps['score_out_of_five'] = communications_apps['weighted_sum'] / total_reviews


In [286]:
communications_apps

Unnamed: 0,similarweb_url,App,Publisher,Category,appId,Country,alpha2Code,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star,weighted_sum,score_out_of_five
0,https://www.similarweb.com/top-apps/google/alg...,WhatsApp Messenger,WhatsApp LLC,Communication,com.whatsapp,Algeria,DZ,https://play.google.com/store/apps/details?id=...,190000000,124642625,15062961,10741084,7008012,29181989,758886234,4.066115
1,https://www.similarweb.com/top-apps/google/alg...,Messenger,"Meta Platforms, Inc.",Communication,com.facebook.orca,Algeria,DZ,https://play.google.com/store/apps/details?id=...,89100000,46053579,4949839,3550516,2744035,27834686,294041555,3.453922
2,https://www.similarweb.com/top-apps/google/alg...,Telegram,Telegram FZ-LLC,Communication,org.telegram.messenger,Algeria,DZ,https://play.google.com/store/apps/details?id=...,13900000,9333940,1028448,697534,351536,2075800,55654966,4.126485
3,https://www.similarweb.com/top-apps/google/alg...,Snapchat,Snap Inc,Communication,com.snapchat.android,Algeria,DZ,https://play.google.com/store/apps/details?id=...,35200000,15467638,1874281,1275746,792059,13867570,104114240,3.128687
4,https://www.similarweb.com/top-apps/google/alg...,Truecaller: Identify Caller ID,Truecaller,Communication,com.truecaller,Algeria,DZ,https://play.google.com/store/apps/details?id=...,21200000,15974108,1847402,1139970,556583,1471526,93264750,4.443381
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4795,https://www.similarweb.com/top-apps/google/ser...,Smart Watch app - BT notifier,Flying Phoenix,Communication,com.smartwatch.bluetooth.sync.notifications,Serbia,RS,https://play.google.com/store/apps/details?id=...,210000,161181,23739,9995,3748,4997,943339,4.631931
4796,https://www.similarweb.com/top-apps/google/ser...,Botim - Video and Voice Call,Algento Cloud Computing Limited,Communication,im.thebot.messenger,Serbia,RS,https://play.google.com/store/apps/details?id=...,1140000,798697,156109,87130,10891,65347,4966440,4.441563
4797,https://www.similarweb.com/top-apps/google/ser...,Opera Mini: Fast Web Browser,Opera,Communication,com.opera.mini.native,Serbia,RS,https://play.google.com/store/apps/details?id=...,9130000,7209486,765358,274017,160630,406301,40658474,4.612005
4798,https://www.similarweb.com/top-apps/google/ser...,Opera GX: Gaming Browser,Opera,Communication,com.opera.gx,Serbia,RS,https://play.google.com/store/apps/details?id=...,198000,153543,20823,3763,5519,5519,878853,4.645911


## Social apps

In [287]:
social_apps.dropna(subset=['reviews'], inplace=True)

In [288]:
social_apps['reviews'] = social_apps['reviews'].apply(lambda x: x.replace(' reviews', '').replace('info', ''))

In [289]:
social_apps['reviews'] = social_apps['reviews'].apply(convert_to_int)

In [298]:
social_apps['five_star'] = social_apps['five_star'].apply(lambda x: int(x.replace(',', '')))
social_apps['four_star'] = social_apps['four_star'].apply(lambda x: int(x.replace(',', '')))
social_apps['three_star'] = social_apps['three_star'].apply(lambda x: int(x.replace(',', '')))
social_apps['two_star'] = social_apps['two_star'].apply(lambda x: int(x.replace(',', '')))
social_apps['one_star'] = social_apps['one_star'].apply(lambda x: int(x.replace(',', '')))

In [313]:
social_apps['Country'] = social_apps['Country'].apply(lambda x: x.replace('-', ' '))

social_apps['Country'] = social_apps['Country'].apply(lambda x: x.title())

In [315]:
# Calculate the weighted sum for each row based on star rating columns
social_apps['weighted_sum'] = social_apps.apply(lambda row: sum(row[star] * weights[star] for star in star_rating_columns), axis=1)


# Calculate the total number of reviews
total_reviews = social_apps[star_rating_columns].sum(axis=1)

# Calculate the score out of five
social_apps['score_out_of_five'] = social_apps['weighted_sum'] / total_reviews


In [316]:
social_apps.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4587 entries, 0 to 4799
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   similarweb_url     4587 non-null   object 
 1   App                4587 non-null   object 
 2   Publisher          4587 non-null   object 
 3   Category           4587 non-null   object 
 4   appId              4587 non-null   object 
 5   Country            4587 non-null   object 
 6   alpha2Code         4587 non-null   object 
 7   google_play_url    4587 non-null   object 
 8   reviews            4587 non-null   int64  
 9   five_star          4587 non-null   int64  
 10  four_star          4587 non-null   int64  
 11  three_star         4587 non-null   int64  
 12  two_star           4587 non-null   int64  
 13  one_star           4587 non-null   int64  
 14  weighted_sum       4587 non-null   int64  
 15  score_out_of_five  4587 non-null   float64
dtypes: float64(1), int64(7),

In [317]:
social_apps

Unnamed: 0,similarweb_url,App,Publisher,Category,appId,Country,alpha2Code,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star,weighted_sum,score_out_of_five
0,https://www.similarweb.com/top-apps/google/alg...,Facebook Lite,"Meta Platforms, Inc.",Social,com.facebook.lite,Algeria,DZ,https://play.google.com/store/apps/details?id=...,26300000,7437778,784400,631995,569459,16228483,59589876,2.323001
1,https://www.similarweb.com/top-apps/google/alg...,Facebook,"Meta Platforms, Inc.",Social,com.facebook.katana,Algeria,DZ,https://play.google.com/store/apps/details?id=...,145000000,39286109,3968185,2898191,1885699,91294973,316064229,2.268406
2,https://www.similarweb.com/top-apps/google/alg...,TikTok,TikTok Pte. Ltd.,Social,com.zhiliaoapp.musically,Algeria,DZ,https://play.google.com/store/apps/details?id=...,62700000,40186720,4186403,2742211,1753926,10945611,240359308,4.018387
3,https://www.similarweb.com/top-apps/google/alg...,TikTok Lite - Save Data,TikTok Pte. Ltd.,Social,com.zhiliaoapp.musically,Algeria,DZ,https://play.google.com/store/apps/details?id=...,62700000,40186486,4186378,2742195,1753916,10945548,240357907,4.018387
4,https://www.similarweb.com/top-apps/google/alg...,Instagram,Instagram,Social,com.instagram.android,Algeria,DZ,https://play.google.com/store/apps/details?id=...,153000000,40835101,4352180,2569074,1721891,98732195,331467424,2.236465
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4795,https://www.similarweb.com/top-apps/google/ser...,Xviews - Video Chat&Hook Up,Wavechat Inc.,Social,com.waooolive.android,Serbia,RS,https://play.google.com/store/apps/details?id=...,12700,10892,907,0,0,907,58995,4.643082
4796,https://www.similarweb.com/top-apps/google/ser...,Chat Alternative — android app,Video Chat Alt,Social,com.chatroullete.alternative,Serbia,RS,https://play.google.com/store/apps/details?id=...,135000,53523,7646,6241,4993,54772,381680,3.001219
4797,https://www.similarweb.com/top-apps/google/ser...,BeReal. Your friends for real.,BeReal,Social,com.bereal.ft,Serbia,RS,https://play.google.com/store/apps/details?id=...,291000,202393,40333,8357,14171,22891,1249601,4.336709
4798,https://www.similarweb.com/top-apps/google/ser...,FollowMeter for Instagram,Followmeter,Social,com.beakerapps.instameter2,Serbia,RS,https://play.google.com/store/apps/details?id=...,144000,73741,17470,3860,3860,38801,496686,3.606177


## Email apps

In [352]:
email_apps['reviews'] = email_apps['reviews'].apply(lambda x: x.replace(' reviews', '').replace('info', ''))

email_apps['reviews'] = email_apps['reviews'].apply(convert_to_int)

In [354]:
email_apps['five_star'] = email_apps['five_star'].apply(lambda x: int(x.replace(',', '')))
email_apps['four_star'] = email_apps['four_star'].apply(lambda x: int(x.replace(',', '')))
email_apps['three_star'] = email_apps['three_star'].apply(lambda x: int(x.replace(',', '')))
email_apps['two_star'] = email_apps['two_star'].apply(lambda x: int(x.replace(',', '')))
email_apps['one_star'] = email_apps['one_star'].apply(lambda x: int(x.replace(',', '')))

In [356]:
email_apps['Country'] = email_apps['Country'].apply(lambda x: x.replace('-', ' '))

email_apps['Country'] = email_apps['Country'].apply(lambda x: x.title())

In [358]:
# Calculate the weighted sum for each row based on star rating columns
email_apps['weighted_sum'] = email_apps.apply(lambda row: sum(row[star] * weights[star] for star in star_rating_columns), axis=1)


# Calculate the total number of reviews
total_reviews = email_apps[star_rating_columns].sum(axis=1)

# Calculate the score out of five
email_apps['score_out_of_five'] = email_apps['weighted_sum'] / total_reviews


In [359]:
email_apps

Unnamed: 0,Country,alpha2Code,App,Publisher,appId,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star,weighted_sum,score_out_of_five
0,Algeria,DZ,Email - Fast and Smart Mail,AMOBEAR TECHNOLOGY GROUP,com.fastsigninemail.securemail.bestemail,https://play.google.com/store/apps/details?id=...,83500,55756,11046,3945,789,7890,344267,4.334437
1,Algeria,DZ,Temp Mail - Temporary Email,Privatix Limited,com.tempmail,https://play.google.com/store/apps/details?id=...,325000,231420,27484,18983,10554,31734,1376827,4.300233
2,Algeria,DZ,Mail.ru - Email App,Mail.Ru Group,ru.mail.mailapp,https://play.google.com/store/apps/details?id=...,3140000,2135250,386820,154728,139255,201146,13167370,4.364104
3,Algeria,DZ,All Email Connect,AI Email & Access,com.mail.inbox.allemailaccess,https://play.google.com/store/apps/details?id=...,10600,4517,4517,0,0,1505,42158,4.000190
4,Algeria,DZ,Gmail,Google LLC,com.google.android.gm,https://play.google.com/store/apps/details?id=...,13000000,8493138,1030231,636912,372862,1925536,51168610,4.107065
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2609,Serbia,RS,Libero Mail,Italiaonline S.p.A.,it.italiaonline.mail,https://play.google.com/store/apps/details?id=...,99500,52183,34789,0,0,8697,408768,4.272732
2610,Serbia,RS,Gmail Go,Google LLC,com.google.android.gm.lite,https://play.google.com/store/apps/details?id=...,145000,95879,11279,8459,4229,19739,578085,4.141455
2611,Serbia,RS,Email for Hotmail & Outlook,Amobear Application - Diavostar PTE. LTD,com.microsoft.office.outlook,https://play.google.com/store/apps/details?id=...,9480000,7789110,1056052,115354,71915,144314,43803964,4.773366
2612,Serbia,RS,All Email Access: Mail Inbox,Appsbuyout Dev,info.myapp.allemailaccess,https://play.google.com/store/apps/details?id=...,62300,46422,13754,0,0,1719,288845,4.666694


# Finalise the data into tabs

## World Map - Loved Email

World Map - Loved Email: isolate the highest rated email app by country

In [360]:
# Filter the dataframe to include only rows where Category is 'Email'
loved_email = email_apps.copy()

# Group by 'Country' and find the index of the row with the highest 'score_out_of_five' in each group
idx = loved_email.groupby('Country')['score_out_of_five'].idxmax()

# Use the index to get the corresponding rows
loved_email = loved_email.loc[idx]

In [361]:
loved_email

Unnamed: 0,Country,alpha2Code,App,Publisher,appId,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star,weighted_sum,score_out_of_five
19,Algeria,DZ,NAVER Mail,NAVER Corp.,com.nhn.android.mail,https://play.google.com/store/apps/details?id=...,22000,21370,0,0,0,0,106850,5.000000
56,Argentina,AR,Yandex Mail,Direct Cursus Computer Systems Trading LLC,ru.yandex.mail,https://play.google.com/store/apps/details?id=...,552000,455929,61859,0,0,4582,2531663,4.846494
142,Armenia,AM,Mail.ru - Email App,Mail.Ru Group,ru.mail.mailapp,https://play.google.com/store/apps/details?id=...,3140000,2706779,187470,45758,15849,62889,14515636,4.808500
84,Australia,AU,"FairEmail, privacy aware email","Marcel Bokhorst, FairCode BV",eu.faircode.email,https://play.google.com/store/apps/details?id=...,26500,21238,1639,213,0,712,114097,4.793589
2453,Austria,AT,"FairEmail, privacy aware email","Marcel Bokhorst, FairCode BV",eu.faircode.email,https://play.google.com/store/apps/details?id=...,26500,23150,0,230,0,461,116901,4.903360
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2359,United States,US,"FairEmail, privacy aware email","Marcel Bokhorst, FairCode BV",eu.faircode.email,https://play.google.com/store/apps/details?id=...,26500,20365,1793,660,188,849,112202,4.703500
2395,Uruguay,UY,1&1 Mail,1&1 Telecom GmbH,com.oneandone.ciso.mobile.app.android,https://play.google.com/store/apps/details?id=...,10100,7872,1574,0,0,0,45656,4.833369
2411,Uzbekistan,UZ,Yandex Mail,Direct Cursus Computer Systems Trading LLC,ru.yandex.mail,https://play.google.com/store/apps/details?id=...,552000,492925,16720,6137,0,8254,2558170,4.881668
2429,Venezuela Bolivarian Republic Of,VE,Yandex Mail,Direct Cursus Computer Systems Trading LLC,ru.yandex.mail,https://play.google.com/store/apps/details?id=...,552000,460464,51366,3669,3669,0,2526129,4.865726


## World Map - Hated Email

World Map - Hated Email: isolate the lowest rated email app by country

In [362]:
hated_email = email_apps.copy()

idx = hated_email.groupby('Country')['score_out_of_five'].idxmin()

hated_email = hated_email.loc[idx]

In [363]:
hated_email

Unnamed: 0,Country,alpha2Code,App,Publisher,appId,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star,weighted_sum,score_out_of_five
17,Algeria,DZ,1&1 Mail,1&1 Telecom GmbH,com.oneandone.ciso.mobile.app.android,https://play.google.com/store/apps/details?id=...,10100,4408,1889,629,0,2519,34002,3.600000
60,Argentina,AR,1&1 Mail,1&1 Telecom GmbH,com.oneandone.ciso.mobile.app.android,https://play.google.com/store/apps/details?id=...,10100,4723,787,0,787,3149,31486,3.333263
162,Armenia,AM,freenet Mail - E-Mail Postfach,freenet.de GmbH,de.freenet.mail,https://play.google.com/store/apps/details?id=...,22300,2097,900,1366,2150,13802,36285,1.786119
90,Australia,AU,freenet Mail - E-Mail Postfach,freenet.de GmbH,de.freenet.mail,https://play.google.com/store/apps/details?id=...,22300,2539,0,2539,0,12698,33010,1.856998
2450,Austria,AT,ABV Mail,„Нет Инфо” АД,bg.abv.andro.emailapp,https://play.google.com/store/apps/details?id=...,14100,5132,810,540,810,5942,38082,2.877588
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2381,United States,US,Login Mail For HotMail&Outlook,AMOBEAR TECHNOLOGY GROUP,com.emailonline.officemail.amoemail,https://play.google.com/store/apps/details?id=...,26500,7095,1493,2040,2451,12013,64482,2.569823
2385,Uruguay,UY,All Email Connect,AI Email & Access,com.mail.inbox.allemailaccess,https://play.google.com/store/apps/details?id=...,10600,6626,1204,1204,0,1506,43064,4.085769
2410,Uzbekistan,UZ,Mail - All Email Accounts,Amobear Application - Diavostar PTE. LTD,com.quickemail.quicklogin.emailonline,https://play.google.com/store/apps/details?id=...,7590,3311,1419,946,946,946,27907,3.687500
2432,Venezuela Bolivarian Republic Of,VE,Libero Mail,Italiaonline S.p.A.,it.italiaonline.mail,https://play.google.com/store/apps/details?id=...,99500,19134,19134,19134,19134,19134,287010,3.000000


## World Map - Loved Communications

In [322]:
# Filter the dataframe to include only rows where Category is 'Email'
loved_communication = communications_apps[communications_apps['Category'] == 'Communication']

# Group by 'Country' and find the index of the row with the highest 'score_out_of_five' in each group
idx = loved_communication.groupby('Country')['score_out_of_five'].idxmax()

# Use the index to get the corresponding rows
loved_communication = loved_communication.loc[idx]
loved_communication

Unnamed: 0,similarweb_url,App,Publisher,Category,appId,Country,alpha2Code,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star,weighted_sum,score_out_of_five
21,https://www.similarweb.com/top-apps/google/alg...,"Brave Private Web Browser, VPN",Brave Software,Communication,com.brave.browser,Algeria,DZ,https://play.google.com/store/apps/details?id=...,1850000,1338242,290965,102926,15834,37607,8233123,4.610911
125,https://www.similarweb.com/top-apps/google/arg...,Sticker Maker for WhatsApp,Trusted Tools,Communication,customstickermaker.whatsappstickers.personalst...,Argentina,AR,https://play.google.com/store/apps/details?id=...,273000,249763,12358,3821,731,1544,1312716,4.894231
270,https://www.similarweb.com/top-apps/google/arm...,Speak and Translate Languages,Learning Easy,Communication,com.translate.talkingtranslator,Armenia,AM,https://play.google.com/store/apps/details?id=...,99600,79887,11085,3032,785,2664,457105,4.690517
161,https://www.similarweb.com/top-apps/google/aus...,"Brave Private Web Browser, VPN",Brave Software,Communication,com.brave.browser,Australia,AU,https://play.google.com/store/apps/details?id=...,1850000,1457490,248063,29922,18124,32066,8437782,4.725288
4510,https://www.similarweb.com/top-apps/google/aus...,spusu,Mass Response,Communication,com.massresponse.spusuuk,Austria,AT,https://play.google.com/store/apps/details?id=...,24,24,0,0,0,0,120,5.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4323,https://www.similarweb.com/top-apps/google/uni...,Opera browser with AI,Opera,Communication,com.opera.browser,United States,US,https://play.google.com/store/apps/details?id=...,4660000,3773521,240542,68895,51675,172262,20312070,4.716175
4371,https://www.similarweb.com/top-apps/google/uru...,Sticker Maker for WhatsApp,Trusted Tools,Communication,customstickermaker.whatsappstickers.personalst...,Uruguay,UY,https://play.google.com/store/apps/details?id=...,273000,243608,15743,2485,2485,2485,1295922,4.857170
4422,https://www.similarweb.com/top-apps/google/uzb...,imo HD - Video Calls and Chats,imo.im,Communication,com.imo.android.imous,Uzbekistan,UZ,https://play.google.com/store/apps/details?id=...,130000,127319,0,0,0,0,636595,5.000000
4471,https://www.similarweb.com/top-apps/google/ven...,Whats Dual - Whatscan App,360 Tool,Communication,whatsapp.web.whatscan.whatsweb.qrscan,Venezuela Bolivarian Republic Of,VE,https://play.google.com/store/apps/details?id=...,104000,93085,4621,1760,880,880,491829,4.858722


## World Map - Hated Communications

In [323]:
# Filter the dataframe to include only rows where Category is 'Email'
hated_communication = communications_apps[communications_apps['Category'] == 'Communication']

# Group by 'Country' and find the index of the row with the highest 'score_out_of_five' in each group
idx = hated_communication.groupby('Country')['score_out_of_five'].idxmin()

# Use the index to get the corresponding rows
hated_communication = hated_communication.loc[idx]
hated_communication

Unnamed: 0,similarweb_url,App,Publisher,Category,appId,Country,alpha2Code,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star,weighted_sum,score_out_of_five
3,https://www.similarweb.com/top-apps/google/alg...,Snapchat,Snap Inc,Communication,com.snapchat.android,Algeria,DZ,https://play.google.com/store/apps/details?id=...,35200000,15467638,1874281,1275746,792059,13867570,104114240,3.128687
134,https://www.similarweb.com/top-apps/google/arg...,Voissy,Andromeda Smart Softwares,Communication,andromeda.smart.softwares.com.voissy,Argentina,AR,https://play.google.com/store/apps/details?id=...,1910,439,48,97,97,1171,4043,2.183045
264,https://www.similarweb.com/top-apps/google/arm...,WeChat,WeChat International,Communication,com.tencent.mm,Armenia,AM,https://play.google.com/store/apps/details?id=...,6210000,3199228,402682,290857,210738,1828608,20729523,3.494459
180,https://www.similarweb.com/top-apps/google/aus...,ALDImobile,MEDION_Australia,Communication,au.com.aldi.android,Australia,AU,https://play.google.com/store/apps/details?id=...,3410,884,273,168,299,1639,8253,2.529268
4534,https://www.similarweb.com/top-apps/google/aus...,Mein MTEL Austria,MTEL APP,Communication,at.mtel.selfcare,Austria,AT,https://play.google.com/store/apps/details?id=...,353,96,17,9,13,208,809,2.358601
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4320,https://www.similarweb.com/top-apps/google/uni...,T-Mobile Visual Voicemail,T-Mobile USA,Communication,com.tmobile.vvm.application,United States,US,https://play.google.com/store/apps/details?id=...,86500,30064,6138,4922,5763,39116,240280,2.793856
4361,https://www.similarweb.com/top-apps/google/uru...,Magic TVHD,alex nicole,Communication,com.m.magictvhd,Uruguay,UY,https://play.google.com/store/apps/details?id=...,1120,55,0,55,0,221,661,1.996979
4441,https://www.similarweb.com/top-apps/google/uzb...,4G LTE Mode only,AlphaBrain Apps Studio,Communication,com.four4glte.only.networkmode,Uzbekistan,UZ,https://play.google.com/store/apps/details?id=...,7370,4011,200,300,401,2406,24963,3.411178
4466,https://www.similarweb.com/top-apps/google/ven...,Voissy,Andromeda Smart Softwares,Communication,andromeda.smart.softwares.com.voissy,Venezuela Bolivarian Republic Of,VE,https://play.google.com/store/apps/details?id=...,1910,421,42,84,168,1138,3999,2.158122


## World Map - Loved Social Media

World Map - Loved Social Media: isolate the highest rated social media app by country

In [325]:
# Group by 'Country' and find the index of the row with the highest 'score_out_of_five' in each group
idx = social_apps.groupby('Country')['score_out_of_five'].idxmax()

# Use the index to get the corresponding rows
loved_social = social_apps.loc[idx]
loved_social

Unnamed: 0,similarweb_url,App,Publisher,Category,appId,Country,alpha2Code,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star,weighted_sum,score_out_of_five
47,https://www.similarweb.com/top-apps/google/alg...,MeYo : be friends,MeYo Studio,Social,com.pita.oyem,Algeria,DZ,https://play.google.com/store/apps/details?id=...,388000,345446,12563,10452,4117,10452,1827524,4.771229
141,https://www.similarweb.com/top-apps/google/arg...,Zeetok - Meet and Chat,ZeetokSocial,Social,com.zeetok.videochat,Argentina,AR,https://play.google.com/store/apps/details?id=...,94800,91417,517,1409,695,160,464930,4.935667
277,https://www.similarweb.com/top-apps/google/arm...,Video downloader - Story Saver,ETM Video Downloader,Social,instagram.video.downloader.story.saver.ig,Armenia,AM,https://play.google.com/store/apps/details?id=...,174000,159919,7535,1736,908,2674,839433,4.858617
184,https://www.similarweb.com/top-apps/google/aus...,Harmonic for Hacker News,Simon Halvdansson,Social,com.simon.harmonichackernews,Australia,AU,https://play.google.com/store/apps/details?id=...,779,620,124,0,0,0,3596,4.833333
4549,https://www.similarweb.com/top-apps/google/aus...,My TEDi Mitarbeiter-App,TEDi GmbH & Co. KG,Social,com.tediinternal.employeeapp,Austria,AT,https://play.google.com/store/apps/details?id=...,111,107,0,0,0,0,535,5.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4319,https://www.similarweb.com/top-apps/google/uni...,sendit,"iconic hearts, inc.",Social,com.fullsenders.sendit,United States,US,https://play.google.com/store/apps/details?id=...,616000,507266,59207,19459,5217,7138,2849107,4.762107
4379,https://www.similarweb.com/top-apps/google/uru...,Status Saver: Video Downloader,BlueLine. Tech,Social,video.downloader.videodownloader,Uruguay,UY,https://play.google.com/store/apps/details?id=...,2029999,1830865,101670,14974,0,14974,9620901,4.902412
4429,https://www.similarweb.com/top-apps/google/uzb...,Video downloader - Story Saver,ETM Video Downloader,Social,instagram.video.downloader.story.saver.ig,Uzbekistan,UZ,https://play.google.com/store/apps/details?id=...,174000,169617,1511,1511,0,0,858662,4.973743
4483,https://www.similarweb.com/top-apps/google/ven...,Video downloader - Story Saver,ETM Video Downloader,Social,instagram.video.downloader.story.saver.ig,Venezuela Bolivarian Republic Of,VE,https://play.google.com/store/apps/details?id=...,174000,167492,3912,1548,0,0,857752,4.959480


## World Map - Hated Social Media

In [327]:
# Group by 'Country' and find the index of the row with the highest 'score_out_of_five' in each group
idx = social_apps.groupby('Country')['score_out_of_five'].idxmin()

# Use the index to get the corresponding rows
hated_social = social_apps.loc[idx]
hated_social

Unnamed: 0,similarweb_url,App,Publisher,Category,appId,Country,alpha2Code,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star,weighted_sum,score_out_of_five
4,https://www.similarweb.com/top-apps/google/alg...,Instagram,Instagram,Social,com.instagram.android,Algeria,DZ,https://play.google.com/store/apps/details?id=...,153000000,40835101,4352180,2569074,1721891,98732195,331467424,2.236465
140,https://www.similarweb.com/top-apps/google/arg...,Adoraboo,Happeny Technology Pte. Ltd.,Social,com.adoraboo,Argentina,AR,https://play.google.com/store/apps/details?id=...,159,12,51,25,12,51,414,2.741722
283,https://www.similarweb.com/top-apps/google/arm...,WorldTalk-Date with foreigners,Zero Technology,Social,com.zerophil.worldtalk,Armenia,AM,https://play.google.com/store/apps/details?id=...,16000,5927,1414,1703,1414,4911,48139,3.132214
152,https://www.similarweb.com/top-apps/google/aus...,Reddit,reddit Inc.,Social,com.reddit.frontpage,Australia,AU,https://play.google.com/store/apps/details?id=...,3090000,1011082,306408,88524,93107,1519355,8252183,2.733891
4504,https://www.similarweb.com/top-apps/google/aus...,Reddit,reddit Inc.,Social,com.reddit.frontpage,Austria,AT,https://play.google.com/store/apps/details?id=...,3090000,906225,170667,93551,106193,1741445,7448277,2.467885
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4325,https://www.similarweb.com/top-apps/google/uni...,MobilePatrol Public Safety App,Vinelink,Social,com.appriss.mobilepatrol,United States,US,https://play.google.com/store/apps/details?id=...,37200,12477,3019,2799,2567,15183,103175,2.862394
4367,https://www.similarweb.com/top-apps/google/uru...,OmeTV – Video Chat Alternative,Video Chat Alternative,Social,omegle.tv,Uruguay,UY,https://play.google.com/store/apps/details?id=...,561000,243295,31024,37555,9797,215536,1688366,3.142859
4440,https://www.similarweb.com/top-apps/google/uzb...,"TenChat: больше, чем соцсеть","VBClab, LLC",Social,co.vero.app,Uzbekistan,UZ,https://play.google.com/store/apps/details?id=...,34700,13253,2650,5301,2650,10602,108670,3.153877
4486,https://www.similarweb.com/top-apps/google/ven...,Chikii-Play PC Games,Chikii Cloud Game,Social,com.dianyun.chikii,Venezuela Bolivarian Republic Of,VE,https://play.google.com/store/apps/details?id=...,183000,67936,19121,12147,13947,62762,543261,3.088237


# Create an Excel file

In [366]:
# Create an Excel writer object

writer = pd.ExcelWriter('The Most Loved Email, Messaging and Social Media Apps by Country.xlsx')

In [367]:
loved_email.to_excel(writer, sheet_name='Loved Email', index=False)
hated_email.to_excel(writer, sheet_name='Hated Email', index=False)
loved_communication.to_excel(writer, sheet_name='Loved Communications', index=False)
hated_communication.to_excel(writer, sheet_name='Hated Communications', index=False)
loved_social.to_excel(writer, sheet_name='Loved Social Media', index=False)
hated_social.to_excel(writer, sheet_name='Hated Social Media', index=False)
communications_apps.to_excel(writer, sheet_name='Raw Communications Apps', index=False)
social_apps.to_excel(writer, sheet_name='Raw Social Media Apps', index=False)
email_apps.to_excel(writer, sheet_name='Raw Email Apps', index=False)


In [368]:
# Save the Excel file
writer.save()

In [373]:
email_apps['Category'] = 'Email'

In [375]:
communications_apps[communications_apps.Category!='Email']

Unnamed: 0,similarweb_url,App,Publisher,Category,appId,Country,alpha2Code,google_play_url,reviews,five_star,four_star,three_star,two_star,one_star,weighted_sum,score_out_of_five
0,https://www.similarweb.com/top-apps/google/alg...,WhatsApp Messenger,WhatsApp LLC,Communication,com.whatsapp,Algeria,DZ,https://play.google.com/store/apps/details?id=...,190000000,124642625,15062961,10741084,7008012,29181989,758886234,4.066115
1,https://www.similarweb.com/top-apps/google/alg...,Messenger,"Meta Platforms, Inc.",Communication,com.facebook.orca,Algeria,DZ,https://play.google.com/store/apps/details?id=...,89100000,46053579,4949839,3550516,2744035,27834686,294041555,3.453922
2,https://www.similarweb.com/top-apps/google/alg...,Telegram,Telegram FZ-LLC,Communication,org.telegram.messenger,Algeria,DZ,https://play.google.com/store/apps/details?id=...,13900000,9333940,1028448,697534,351536,2075800,55654966,4.126485
3,https://www.similarweb.com/top-apps/google/alg...,Snapchat,Snap Inc,Communication,com.snapchat.android,Algeria,DZ,https://play.google.com/store/apps/details?id=...,35200000,15467638,1874281,1275746,792059,13867570,104114240,3.128687
4,https://www.similarweb.com/top-apps/google/alg...,Truecaller: Identify Caller ID,Truecaller,Communication,com.truecaller,Algeria,DZ,https://play.google.com/store/apps/details?id=...,21200000,15974108,1847402,1139970,556583,1471526,93264750,4.443381
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4794,https://www.similarweb.com/top-apps/google/ser...,Phone Dialer & Caller ID,Share File Technologies by 090 Bravo,Communication,call.color.flash.phone.callerscreen.flashlight...,Serbia,RS,https://play.google.com/store/apps/details?id=...,2540,1503,0,0,250,751,8766,3.500799
4795,https://www.similarweb.com/top-apps/google/ser...,Smart Watch app - BT notifier,Flying Phoenix,Communication,com.smartwatch.bluetooth.sync.notifications,Serbia,RS,https://play.google.com/store/apps/details?id=...,210000,161181,23739,9995,3748,4997,943339,4.631931
4796,https://www.similarweb.com/top-apps/google/ser...,Botim - Video and Voice Call,Algento Cloud Computing Limited,Communication,im.thebot.messenger,Serbia,RS,https://play.google.com/store/apps/details?id=...,1140000,798697,156109,87130,10891,65347,4966440,4.441563
4797,https://www.similarweb.com/top-apps/google/ser...,Opera Mini: Fast Web Browser,Opera,Communication,com.opera.mini.native,Serbia,RS,https://play.google.com/store/apps/details?id=...,9130000,7209486,765358,274017,160630,406301,40658474,4.612005


In [376]:
raw_data = pd.concat([communications_apps[communications_apps.Category!='Email'], social_apps, email_apps])

In [378]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11614 entries, 0 to 2613
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   similarweb_url     9000 non-null   object 
 1   App                11614 non-null  object 
 2   Publisher          11614 non-null  object 
 3   Category           11614 non-null  object 
 4   appId              11614 non-null  object 
 5   Country            11614 non-null  object 
 6   alpha2Code         11614 non-null  object 
 7   google_play_url    11614 non-null  object 
 8   reviews            11614 non-null  int64  
 9   five_star          11614 non-null  int64  
 10  four_star          11614 non-null  int64  
 11  three_star         11614 non-null  int64  
 12  two_star           11614 non-null  int64  
 13  one_star           11614 non-null  int64  
 14  weighted_sum       11614 non-null  int64  
 15  score_out_of_five  11614 non-null  float64
dtypes: float64(1), int64(7)

In [379]:
raw_data.to_csv('raw_data.csv')