About the dataset

This dataset is the collection of facebook users from different countries . The data set contains five columns of data 

Name : Name of the country for which the data is collected
Users : This column provides the number of facebook users in millions for each respective countries
Facebook_Users : This column shows the percentage of the total population of each country that uses the facebook
Date_Of_Data : Data on which the data is compiled
Population : Total population of the country

In [24]:
#Module Imports
import numpy as np
import pandas as pd
import plotly.express as px
# from scipy import stats

In [25]:
data = pd.read_csv("twitter.csv")
data.head()

Unnamed: 0,Name,Users,Facebook_Users%,Date_of_Data,Population
0,India,416.6M,29.16%,2021-06,1428627663
1,United States,240M,70.59%,2020-12,339996563
2,Indonesia,176.5M,63.6%,2021-06,277534122
3,Brazil,139M,64.23%,2020-12,216422446
4,Philippines,91M,77.55%,2021-06,117337368


Exploratory Data Analysis

In [26]:
#Check the data types of values on each columns
data.dtypes

Name               object
Users              object
Facebook_Users%    object
Date_of_Data       object
Population         object
dtype: object

In [27]:
#Check the number of missing values
data.isnull().sum()

Name               0
Users              0
Facebook_Users%    0
Date_of_Data       0
Population         0
dtype: int64

In [28]:
#Del;eting Useless Columns
data.drop("Date_of_Data" , axis = 1 , inplace = True)

In [29]:
def change_data_type_to_float(data , column):
    data[column] = data[column].str.replace(',', '')
    data[column] = data[column].astype('float64')
change_data_type_to_float(data , "Population") 

In [30]:
data.head()

Unnamed: 0,Name,Users,Facebook_Users%,Population
0,India,416.6M,29.16%,1428628000.0
1,United States,240M,70.59%,339996600.0
2,Indonesia,176.5M,63.6%,277534100.0
3,Brazil,139M,64.23%,216422400.0
4,Philippines,91M,77.55%,117337400.0


In [31]:
def conver_user_number(value):
    new_value = pd.to_numeric(value.replace('K','').replace('M',''))
    multiplier = 1_000_00 if 'M' in value else  1_000 if 'K' in value else 1
    return float(multiplier * new_value)
data["Users"] = data["Users"].apply(conver_user_number)

In [32]:
data.head()

Unnamed: 0,Name,Users,Facebook_Users%,Population
0,India,41660000.0,29.16%,1428628000.0
1,United States,24000000.0,70.59%,339996600.0
2,Indonesia,17650000.0,63.6%,277534100.0
3,Brazil,13900000.0,64.23%,216422400.0
4,Philippines,9100000.0,77.55%,117337400.0


In [33]:
data["Facebook_Users%"] = data["Facebook_Users%"].str.replace('%','').astype('float64').round(2)
data.head()

Unnamed: 0,Name,Users,Facebook_Users%,Population
0,India,41660000.0,29.16,1428628000.0
1,United States,24000000.0,70.59,339996600.0
2,Indonesia,17650000.0,63.6,277534100.0
3,Brazil,13900000.0,64.23,216422400.0
4,Philippines,9100000.0,77.55,117337400.0


Data Visualization

In [34]:
#Top 10 Countries with most users
top_10_most_users_coutry = px.bar(
    data.head(10) ,
    x="Name",
    y="Users",
    color= "Name", #Used for the purpose of legends
    color_discrete_sequence=px.colors.sequential.Aggrnyl,
    title= "Top 10 Countries With Most Users" ,
    labels={
    'Name' : "Country Name" , 
    "Users" : "Number Of Users"
    }
)
top_10_most_users_coutry

In [35]:
# Top 10 Countries with least users
top_10_countries_with_least_users = px.bar(
    data.sort_values(by="Users").head(10) ,
    x="Name",
    y="Users",
    color= "Name", #Used for the purpose of legends
    color_discrete_sequence=px.colors.sequential.Emrld,
    title= "Top 10 Countries With Most Users" ,
    labels={
    'Name' : "Country Name" , 
    "Users" : "Number Of Users"
    }
)
top_10_countries_with_least_users

In [60]:
#Populatioon and Number of Users Relationship of Top 10 Most Users
population_and_number_of_user_relatn = px.bar(
    data.head(20) ,
    x="Name",
    y=["Population" , "Users"],
    color_discrete_sequence=px.colors.sequential.Aggrnyl,
    title= "Relationship Between Population and Number Of Users Of The Top 10 Countries" ,
    labels={
    'Name' : "Country Name" , 
    "Users" : "Number Of Users"
    }
)
population_and_number_of_user_relatn

Country Wise Comaprison


In [44]:
data.head(3)

Unnamed: 0,Name,Users,Facebook_Users%,Population
0,India,41660000.0,29.16,1428628000.0
1,United States,24000000.0,70.59,339996600.0
2,Indonesia,17650000.0,63.6,277534100.0


In [43]:
import plotly.graph_objs as go
import chart_studio.plotly as py

In [53]:
country_one = "India"
country_two = "United States"
data_one = data[(data.Name == country_one) | (data.Name == country_two)]

In [54]:
data_one.head()

Unnamed: 0,Name,Users,Facebook_Users%,Population
0,India,41660000.0,29.16,1428628000.0
1,United States,24000000.0,70.59,339996600.0


In [65]:
dum1 = px.bar(
    data_one ,
    x="Name",
    y=["Population" , "Users"],
    color_discrete_sequence=px.colors.sequential.Emrld,
    title= f"Users comparison between {country_one} & {country_two}" ,
    labels = {
    "Name" : f"{country_one} & {country_two}",
    "value" : "Total Population by Number of Users"
    }
)
dum1

In [69]:
dum1 = px.bar(
    data_one ,
    x="Name",
    y="Facebook_Users%",
    color_discrete_sequence=px.colors.sequential.Aggrnyl,
    title= f"Percentage of active users in the Country" ,
    labels = {
    "Name" : f"{country_one} & {country_two}",
    "Facebook_Users%" : "Users in percentage"
    }
)
dum1

In [73]:

np.copy(data.Name.array)

array(['India', 'United States', 'Indonesia', 'Brazil', 'Philippines',
       'Mexico', 'Vietnam', 'Thailand', 'Japan', 'Pakistan', 'Egypt',
       'Bangladesh', 'Turkey', 'United Kingdom', 'Iran', 'France',
       'Germany', 'Italy', 'Nigeria', 'Argentina', 'Colombia', 'Malaysia',
       'Spain', 'Saudi Arabia', 'South Korea', 'Iraq', 'Algeria',
       'South Africa', 'Canada', 'Morocco', 'Taiwan', 'Myanmar', 'Peru',
       'Poland', 'Australia', 'Russia', 'Nepal', 'Venezuela', 'Chile',
       'Cambodia', 'Kazakhstan', 'Netherlands', 'Romania', 'Kenya',
       'United Arab Emirates', 'Ecuador', 'Ukraine', 'Sweden', 'Syria',
       'Tunisia', 'Sri Lanka', 'Ghana', 'Portugal', 'Guatemala',
       'Ethiopia', 'Belgium', 'Jordan', 'Israel', 'Hong Kong', 'Bolivia',
       'Hungary', 'Greece', 'Ivory Coast', 'Libya', 'Tanzania',
       'Dominican Republic', 'Austria', 'Singapore', 'Czech Republic',
       'Uzbekistan', 'Azerbaijan', 'Cameroon', 'Kuwait', 'Lebanon',
       'Denmark', 'China'