In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [None]:
pd.set_option('display.max_columns', None)

In [None]:
canada = pd.read_excel('Canada.xlsx',engine='openpyxl',sheet_name='Canada by Citizenship',skiprows=20,skipfooter=2)

In [None]:
canada.head()

# preprocessing 
- find columns that are not usable
- drop columns that are not usable
- rename columns
- fill missing values

In [None]:
canada.info()

In [None]:
canada.describe()

In [None]:
canada.describe(include='object')

In [None]:
canada.head()

In [None]:
# remove the unnecessary columns
canada.drop(columns=['AREA','REG','DEV','Type','Coverage'],inplace=True) 

In [None]:
# rename the columns
canada.rename(columns={'OdName':'Country','AreaName':'Continent','RegName':'Region','DevName':'CountryType'},inplace=True)

In [None]:
canada.head()

In [None]:
years = list(range(1980,2014))
print(years)

In [None]:
canada.set_index('Country',inplace=True)
canada.head()

In [None]:
canada[years].sum(axis=1)

In [None]:
canada['total'] = canada[years].sum(axis=1)
canada.head()

In [None]:
canada.to_csv('canada_cleaned.csv',index=False)

In [None]:
canada.rename({
    'China, Hong Kong Special Administrative Region':'Hong Kong',
    'China, Macao Special Administrative Region':'Macao',
    "Democratic People's Republic of Korea":'North Korea',
    'Republic of Korea':'South Korea',
    'Iran (Islamic Republic of)' :'Iran',
    'United Kingdom of Great Britain and Northern Ireland':'United Kingdom',
    'The former Yugoslav Republic of Macedonia':'Macedonia',
    },inplace=True)

# Easy Visualization

In [None]:
print(plt.style.available)

In [None]:
plt.style.use('seaborn')

In [None]:
canada.head(30).style.background_gradient(cmap='cool')

In [None]:
canada[['Continent','total']].style.bar(subset=['total'])

#### graphical visualization

In [None]:
print(canada.index.tolist())

In [None]:
canada.loc['Malawi']

In [None]:
canada.loc['Malawi',years].plot()
plt.show()

In [None]:
canada.loc['Malawi',years].plot(figsize=(15,6),
                                title='Number of immigrants from Malawi to Canada',
                                xlabel='years 1980-2013',
                                ylabel='No. of immigrants',
                                style='ko--')
plt.show()

Graph generation library
- matplotlib (most popular)
- seaborn
- bokeh
- vega
- plotly (most interactive)

In [None]:
import plotly.express as px

In [None]:
ic = canada.loc[['India','China'],years].T
ic

In [None]:
fig = px.line(data_frame=ic, x=ic.index, y='India')
fig.show()

In [None]:
fig = px.line(data_frame=ic, x=ic.index, y=['India','China'])
fig.show()

In [None]:
fig = px.bar(data_frame=ic, x=ic.index, y=['India','China'])
fig.show()

In [None]:
px.scatter_3d(data_frame=canada,x=1990,y=1980,z=2000,height=1200,width=1200)

In [97]:
px.histogram(data_frame=canada,x='total',facet_col='CountryType')

In [108]:
px.choropleth(data_frame=canada,
            locations=canada.index,
            locationmode='country names',
            color='total',
            hover_name=canada.index,
            hover_data=['Continent','Region'],
            color_continuous_scale='algae', height=1000)

In [111]:
fig = px.bar(data_frame=ic, x=ic.index, y=['India','China'])
fig.write_html('india_china_immigration.html',auto_open=True)