<a href="https://colab.research.google.com/github/jessiejxyu2/ist526/blob/main/Map_data_visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries

In [None]:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

from numpy.random import rand
from numpy import pi, arctan
from pylab import *

import pandas as pd
import numpy as np

# Using plotly.express
import plotly.express as px
import plotly.graph_objects as go

## Mount Google Drive

In [None]:
import os

# mount drive
from google.colab import drive
drive.mount('/content/drive')

# edit this path if needed
my_path = '/content/drive/My Drive/Colab Notebooks/'

# change to this path
os.chdir(my_path)

# verify present working directory. It should be identical to 'my_path'
!pwd

Mounted at /content/drive
/content/drive/My Drive/Colab Notebooks


# Visualize Colored Geo Map (choropleth)
We will work with a dataset containing democracy scores for each country. We'll draw a choropleth (a.k.a., colored map) where colors indicate countries' democracy scores. 

**Note that we must use 3-letter country code for locations param.**

In [None]:
VDem_url = "https://github.com/jkropko/DS-6001/raw/master/localdata/vdem.csv"
vdem = pd.read_csv(VDem_url)
display(vdem.head())

Unnamed: 0,X1,country_name,country_id,country_text_id,year,historical_date,codingstart,gapstart,gapend,codingend,...,v2xcs_ccsi_codehigh,v2xcs_ccsi_codelow,v2xps_party,v2xps_party_codehigh,v2xps_party_codelow,v2x_gender,v2x_gender_codehigh,v2x_gender_codelow,v2x_gencl,v2x_gencl_codehigh
0,1,Mexico,3,MEX,1960,1960-01-01,1900,,,2014,...,0.451123,0.170201,0.681416,0.811379,0.524055,0.347498,0.42127,0.273726,0.555367,0.714971
1,2,Mexico,3,MEX,1961,1961-01-01,1900,,,2014,...,0.461693,0.175715,0.681416,0.811379,0.524055,0.344214,0.417813,0.270614,0.555367,0.714971
2,3,Mexico,3,MEX,1962,1962-01-01,1900,,,2014,...,0.461693,0.175715,0.681416,0.811379,0.524055,0.344214,0.417813,0.270614,0.555367,0.714971
3,4,Mexico,3,MEX,1963,1963-01-01,1900,,,2014,...,0.461693,0.175715,0.681416,0.811379,0.524055,0.344214,0.417813,0.270614,0.555367,0.714971
4,5,Mexico,3,MEX,1964,1964-01-01,1900,,,2014,...,0.461693,0.175715,0.681416,0.811379,0.524055,0.356873,0.428861,0.284885,0.555367,0.714971


In [None]:
# filtering data
vdem_filtered = vdem.query("year==2010")
vdem_filtered = vdem_filtered[['country_name', 'country_text_id', 'v2x_polyarchy']]
vdem_filtered = vdem_filtered.rename({'v2x_polyarchy':'democracy', }, axis=1)
vdem_filtered = vdem_filtered.rename({'country_name':'country'}, axis=1)
display(vdem_filtered.head())

# vdem_filtered.to_csv('democracy.csv', index=False)

Unnamed: 0,country,country_text_id,democracy
50,Mexico,MEX,0.670711
105,Suriname,SUR,0.825037
161,Sweden,SWE,0.929517
216,Switzerland,CHE,0.934928
271,Ghana,GHA,0.785304


In [None]:
# showing the color map 
# make sure to use 3-letter country code

fig = px.choropleth(
  data_frame = vdem_filtered, 
  locations='country_text_id', # very important key
  color='democracy',
  hover_name='country',
  title='Democracy in the World, 2010'                    
)

fig.update(layout=dict(title=dict(x=0.5)))
fig.show()

## Fuse datasets
We'll fuse **vdem** dataframe with **df_country** based on country name. Look closely how I fused these dataframes and created a new one. You need to do similar fusion in your final project. 

In [None]:
# first, load a different dataset 
df_country = px.data.gapminder()

# take a peek
display(df_country.head())
display(df_country.tail())

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4


Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
1699,Zimbabwe,Africa,1987,62.351,9216418,706.157306,ZWE,716
1700,Zimbabwe,Africa,1992,60.377,10704340,693.420786,ZWE,716
1701,Zimbabwe,Africa,1997,46.809,11404948,792.44996,ZWE,716
1702,Zimbabwe,Africa,2002,39.989,11926563,672.038623,ZWE,716
1703,Zimbabwe,Africa,2007,43.487,12311143,469.709298,ZWE,716


In [None]:
# decide what columns to merge
# df_fused = vdem[df_country['country'].isin(vdem_filtered['country'])]

# use left join
df_fused= pd.merge(df_country, vdem_filtered, on='country', how='left')

display(df_fused.head())
display(df_fused.tail())

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num,country_text_id,democracy
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4,AFG,0.383379
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4,AFG,0.383379
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4,AFG,0.383379
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4,AFG,0.383379
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4,AFG,0.383379


Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num,country_text_id,democracy
1699,Zimbabwe,Africa,1987,62.351,9216418,706.157306,ZWE,716,ZWE,0.27352
1700,Zimbabwe,Africa,1992,60.377,10704340,693.420786,ZWE,716,ZWE,0.27352
1701,Zimbabwe,Africa,1997,46.809,11404948,792.44996,ZWE,716,ZWE,0.27352
1702,Zimbabwe,Africa,2002,39.989,11926563,672.038623,ZWE,716,ZWE,0.27352
1703,Zimbabwe,Africa,2007,43.487,12311143,469.709298,ZWE,716,ZWE,0.27352


Create a new color graph showing life Expectancy of different countries
You must use **df_fused** data frame



In [None]:
# [Your code goes here]

Create a new color graph showing GDP Per Capita of different countries
You must use **df_fused** data frame



In [None]:
# [Your code goes here]
