## Using Plotly express as px, source the data using the command px.data.gapminder() and        save it into a dataframe. Based on the data, map the following information with the                  appropriate geographical plot. Map out:

### 1. The mean life expectancy of all the Asian countries according to the data.
### 2. Deviation in GDP of each country in Europe and South America.
### 3. The change in population of each African country in the last 3 decades.

# 1.0 Importing Python Libraries

In [1]:
import pandas as pd
import plotly.express as px

# 1.2 Sourcing the data

In [3]:
#creating a dataframe gapminder and storing the data there
gapminder=px.data.gapminder()
gapminder

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.853030,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.100710,AFG,4
3,Afghanistan,Asia,1967,34.020,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4
...,...,...,...,...,...,...,...,...
1699,Zimbabwe,Africa,1987,62.351,9216418,706.157306,ZWE,716
1700,Zimbabwe,Africa,1992,60.377,10704340,693.420786,ZWE,716
1701,Zimbabwe,Africa,1997,46.809,11404948,792.449960,ZWE,716
1702,Zimbabwe,Africa,2002,39.989,11926563,672.038623,ZWE,716


# 1.3 Exploring the dataset

In [4]:
gapminder.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    1704 non-null   object 
 1   continent  1704 non-null   object 
 2   year       1704 non-null   int64  
 3   lifeExp    1704 non-null   float64
 4   pop        1704 non-null   int64  
 5   gdpPercap  1704 non-null   float64
 6   iso_alpha  1704 non-null   object 
 7   iso_num    1704 non-null   int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 106.6+ KB


In [5]:
#summary statistics for numeric columns
gapminder.describe()

Unnamed: 0,year,lifeExp,pop,gdpPercap,iso_num
count,1704.0,1704.0,1704.0,1704.0,1704.0
mean,1979.5,59.474439,29601210.0,7215.327081,425.880282
std,17.26533,12.917107,106157900.0,9857.454543,248.305709
min,1952.0,23.599,60011.0,241.165876,4.0
25%,1965.75,48.198,2793664.0,1202.060309,208.0
50%,1979.5,60.7125,7023596.0,3531.846989,410.0
75%,1993.25,70.8455,19585220.0,9325.462346,638.0
max,2007.0,82.603,1318683000.0,113523.1329,894.0


# 1.4 Data Cleaning

In [7]:
#checking for any null values
gapminder[gapminder.isna().any(axis=1)]

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num


In [8]:
#checking for duplicate values
gapminder[gapminder.duplicated()]

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num


# 1.5 Analysis(Answering Research Questions)

## 1.5.1 Map Out the mean life expectancy of all the Asian countries according to the data.


In [54]:
asia=gapminder[gapminder['continent']=='Asia'].groupby(['country','iso_alpha'])['lifeExp'].mean().reset_index()
asia

Unnamed: 0,country,iso_alpha,lifeExp
0,Afghanistan,AFG,37.478833
1,Bahrain,BHR,65.605667
2,Bangladesh,BGD,49.834083
3,Cambodia,KHM,47.90275
4,China,CHN,61.78514
5,"Hong Kong, China",HKG,73.492833
6,India,IND,53.166083
7,Indonesia,IDN,54.33575
8,Iran,IRN,58.636583
9,Iraq,IRQ,56.58175


In [92]:
fig=px.scatter_geo(asia,locations='iso_alpha',hover_data = ['lifeExp'],color="lifeExp",
                   hover_name="country",title='Life Expectancy in Asian Countries')
fig.show()

## 1.5.2 Map out deviation in GDP of each country in Europe and South America.

In [52]:
gapminder.continent.value_counts()

Africa      624
Asia        396
Europe      360
Americas    300
Oceania      24
Name: continent, dtype: int64

In [67]:
#Filtering the data
europe_SAmerica = px.data.gapminder().query("continent == 'Europe' | continent == 'Americas'").reset_index()
europe_SAmerica

Unnamed: 0,index,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,12,Albania,Europe,1952,55.230,1282697,1601.056136,ALB,8
1,13,Albania,Europe,1957,59.280,1476505,1942.284244,ALB,8
2,14,Albania,Europe,1962,64.820,1728137,2312.888958,ALB,8
3,15,Albania,Europe,1967,66.220,1984060,2760.196931,ALB,8
4,16,Albania,Europe,1972,67.690,2263554,3313.422188,ALB,8
...,...,...,...,...,...,...,...,...,...
655,1639,Venezuela,Americas,1987,70.190,17910182,9883.584648,VEN,862
656,1640,Venezuela,Americas,1992,71.150,20265563,10733.926310,VEN,862
657,1641,Venezuela,Americas,1997,72.146,22374398,10165.495180,VEN,862
658,1642,Venezuela,Americas,2002,72.766,24287670,8605.047831,VEN,862


In [69]:
df=europe_SAmerica.groupby(['country','iso_alpha'])['gdpPercap'].std().reset_index()
df

Unnamed: 0,country,iso_alpha,gdpPercap
0,Albania,ALB,1192.351513
1,Argentina,ARG,1862.583151
2,Austria,AUT,9655.281488
3,Belgium,BEL,8391.186269
4,Bolivia,BOL,524.748706
5,Bosnia and Herzegovina,BIH,1950.522347
6,Brazil,BRA,2436.453754
7,Bulgaria,BGR,2351.963619
8,Canada,CAN,8210.112789
9,Chile,CHL,3026.194023


In [90]:
#plotting the data
fig=px.scatter_geo(europe_SAmerica,locations='iso_alpha',hover_data = ['gdpPercap'],color="gdpPercap",
                   hover_name="country",size='pop',title='Deviation in GDP of Each Country in Europe and America')
fig.show()

## 1.5.3 Map out the change in population of each African country in the last 3 decades.


In [73]:
africa=gapminder[gapminder['continent']=='Africa']
africa

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
24,Algeria,Africa,1952,43.077,9279525,2449.008185,DZA,12
25,Algeria,Africa,1957,45.685,10270856,3013.976023,DZA,12
26,Algeria,Africa,1962,48.303,11000948,2550.816880,DZA,12
27,Algeria,Africa,1967,51.407,12760499,3246.991771,DZA,12
28,Algeria,Africa,1972,54.518,14760787,4182.663766,DZA,12
...,...,...,...,...,...,...,...,...
1699,Zimbabwe,Africa,1987,62.351,9216418,706.157306,ZWE,716
1700,Zimbabwe,Africa,1992,60.377,10704340,693.420786,ZWE,716
1701,Zimbabwe,Africa,1997,46.809,11404948,792.449960,ZWE,716
1702,Zimbabwe,Africa,2002,39.989,11926563,672.038623,ZWE,716


In [75]:
africa.year.describe()

count     624.00000
mean     1979.50000
std        17.27411
min      1952.00000
25%      1965.75000
50%      1979.50000
75%      1993.25000
max      2007.00000
Name: year, dtype: float64

In [77]:
#data for the last 3 decades
africa_pop=africa[((africa.year == 1977) | (africa.year == 2007))]
africa_pop

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
29,Algeria,Africa,1977,58.014,17152804,4910.416756,DZA,12
35,Algeria,Africa,2007,72.301,33333216,6223.367465,DZA,12
41,Angola,Africa,1977,39.483,6162675,3008.647355,AGO,24
47,Angola,Africa,2007,42.731,12420476,4797.231267,AGO,24
125,Benin,Africa,1977,49.190,3168267,1029.161251,BEN,204
...,...,...,...,...,...,...,...,...
1595,Uganda,Africa,2007,51.542,29170398,1056.380121,UGA,800
1685,Zambia,Africa,1977,51.386,5216550,1588.688299,ZMB,894
1691,Zambia,Africa,2007,42.384,11746035,1271.211593,ZMB,894
1697,Zimbabwe,Africa,1977,57.674,6642107,685.587682,ZWE,716


In [84]:
#sorting values by country and year
df=africa_pop.sort_values(['country','year'])
df

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
29,Algeria,Africa,1977,58.014,17152804,4910.416756,DZA,12
35,Algeria,Africa,2007,72.301,33333216,6223.367465,DZA,12
41,Angola,Africa,1977,39.483,6162675,3008.647355,AGO,24
47,Angola,Africa,2007,42.731,12420476,4797.231267,AGO,24
125,Benin,Africa,1977,49.190,3168267,1029.161251,BEN,204
...,...,...,...,...,...,...,...,...
1595,Uganda,Africa,2007,51.542,29170398,1056.380121,UGA,800
1685,Zambia,Africa,1977,51.386,5216550,1588.688299,ZMB,894
1691,Zambia,Africa,2007,42.384,11746035,1271.211593,ZMB,894
1697,Zimbabwe,Africa,1977,57.674,6642107,685.587682,ZWE,716


In [85]:
#calculating percentage change over the past 30 years
df['percentageChange']=df.groupby(['country'])['pop'].pct_change()
df

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num,percentageChange
29,Algeria,Africa,1977,58.014,17152804,4910.416756,DZA,12,
35,Algeria,Africa,2007,72.301,33333216,6223.367465,DZA,12,0.943310
41,Angola,Africa,1977,39.483,6162675,3008.647355,AGO,24,
47,Angola,Africa,2007,42.731,12420476,4797.231267,AGO,24,1.015436
125,Benin,Africa,1977,49.190,3168267,1029.161251,BEN,204,
...,...,...,...,...,...,...,...,...,...
1595,Uganda,Africa,2007,51.542,29170398,1056.380121,UGA,800,1.545908
1685,Zambia,Africa,1977,51.386,5216550,1588.688299,ZMB,894,
1691,Zambia,Africa,2007,42.384,11746035,1271.211593,ZMB,894,1.251686
1697,Zimbabwe,Africa,1977,57.674,6642107,685.587682,ZWE,716,


In [87]:
#plotting the data
fig=px.scatter_geo(df,locations="iso_alpha", hover_data = ['percentageChange'],
                   color="percentageChange", hover_name="country",size='pop',title='Change in population of each African country in the last 3 decades')
fig.show()