# 6.3 Geographic Visualization

#this script contains the following: 1. Import data and libraries 2. Data wrangling 3. Data cleaning 4. Plotting a choropleth

# 1. Import data and libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import os
import folium
import json
import matplotlib.pyplot as plt

In [None]:
# This command propts matplotlib visuals to appear in the notebook 

%matplotlib inline

In [None]:
# Import ".json" file for the world countries

country_geo = r'C:\Users\JocGa\Achievement 6\02 Data\countries.geojson'

In [None]:
import json

f = open(r'C:\Users\JocGa\Achievement 6\02 Data\countries.geojson')

# returns JSON object as a dictionary
data = json.load(f)

# Iterating through the json list
for feature in data['features']:
    properties = feature['properties']
    print(properties)

In [None]:
path = r'C:\Users\JocGa\Achievement 6\02 Data'

In [None]:
# Import the merged data

df_merged_data = pd.read_csv(os.path.join(path, 'Merged_Happiness_Report_2015_2019.csv'))

In [None]:
df_merged_data.head()

In [None]:
#drop unname columns for df
df_merged_data = df_merged_data.drop(columns = ['Unnamed: 0'])

In [None]:
df_merged_data.shape

In [None]:
df_merged_data.columns

# 2. Data Wrangling

In [None]:
# select only the necessary columns and put them in a list called columns
columns = ["Country", "Year", "Happiness_Rank", "Happiness_Score", "GDP_per_Capita", "Social_Support", "Life_Expectancy"]

In [None]:
# create a subset
Country_rec = df_merged_data[columns]

In [None]:
Country_rec.head()

# 3. Conduct consistency checks

In [None]:
#check for missing values
Country_rec.isnull().sum()

#no missing values!

In [None]:
#check for duplicates checks
dups = Country_rec.duplicated()

In [None]:
dups.shape

#no duplicates!

In [None]:
#extreme values checks
sns.histplot(Country_rec['Happiness_Score'], bins=25, kde = True)

#normal distribution

In [None]:
#extreme values checks
sns.histplot(Country_rec['GDP_per_Capita'], bins=25, kde = True)

In [None]:
#extreme values checks
sns.histplot(Country_rec['Social_Support'], bins=25, kde = True)

#left-skewed curve 

In [None]:
sns.histplot(Country_rec['Life_Expectancy'], bins=25, kde = True)

#no extreme values for each variables

# Plotting a Choropleth

In [None]:
# Subset the data for the year 2015
df_merged_data_2015 = Country_rec[Country_rec["Year"] == 2015]

In [None]:
df_merged_data_2015.head()

In [None]:
for feature in data['features']:
    properties = feature['properties']
    print(properties)

In [None]:
# Setup a folium map at a high-level zoom
map1 = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2015,
    columns=['Country', 'Happiness_Score'],
    key_on='feature.properties.ADMIN',
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="Happiness Score",
    highlight=True,
    nan_fill_color='gray',
    reset=True
).add_to(map1)

folium.LayerControl().add_to(map1)

map1

#observation, in 2015, the continent North America has the coutries with the higest score, as well, Brazil, Australia, New Zealand, Norway, Sweden, Finland,United Kigdom, and Ireland, and Netherlands. the lowest score, countries are in some African countries and some in the South Asia.

In [None]:
# Subset the data for the year 2016
df_merged_data_2016 = Country_rec[Country_rec["Year"] == 2016]

In [None]:
df_merged_data_2016.head()

In [None]:
# Setup a folium map at a high-level zoom
map1 = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2016,
    columns=['Country', 'Happiness_Score'],
    key_on='feature.properties.ADMIN',
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="Happiness Score",
    highlight=True,
    nan_fill_color='gray',
    reset=True
).add_to(map1)

folium.LayerControl().add_to(map1)

map1

#observation, in 2016, the same countries scored high in the happiness score, while some of Asia countries move up on the score. 

In [None]:
# Subset the data for the year 2017
df_merged_data_2017 = Country_rec[Country_rec["Year"] == 2017]

In [None]:
# Setup a folium map at a high-level zoom
map1 = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2017,
    columns=['Country', 'Happiness_Score'],
    key_on='feature.properties.ADMIN',
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="Happiness Score",
    highlight=True,
    nan_fill_color='gray',
    reset=True
).add_to(map1)

folium.LayerControl().add_to(map1)

map1

#observation, in 2017, Brazil and Mexico moved down on the happiness score scale; while Canada, USA, and Austraila, New Zealand, Germany, Norway, Sweden, and Ireland stayed up. 

In [None]:
# Subset the data for the year 2018
df_merged_data_2018 = Country_rec[Country_rec["Year"] == 2018]

In [None]:
# Setup a folium map at a high-level zoom
map1 = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2018,
    columns=['Country', 'Happiness_Score'],
    key_on='feature.properties.ADMIN',
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="Happiness Score",
    highlight=True,
    nan_fill_color='gray',
    reset=True
).add_to(map1)

folium.LayerControl().add_to(map1)

map1

#observation, in 2018, Asia score level went down compare to past year. the high score countires are the same for the past years. 

In [None]:
# Subset the data for the year 2019
df_merged_data_2019 = Country_rec[Country_rec["Year"] == 2019]

In [None]:
df_merged_data_2019.head()

In [None]:
# Setup a folium map at a high-level zoom
map1 = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2019,
    columns=['Country', 'Happiness_Score'],
    key_on='feature.properties.ADMIN',
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="Happiness Score",
    highlight=True,
    nan_fill_color='gray',
    reset=True
).add_to(map1)

folium.LayerControl().add_to(map1)

map1

#observation, in 2019, we see that USA happiness score went down. 

### Country vs GDP_per_Capita

In [None]:
# Setup a folium map at a high-level zoom
map = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2015,
    columns=['Country', 'GDP_per_Capita'],
    key_on='feature.properties.ADMIN',  # Update the key_on parameter based on the correct key path
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="GDP_per_Capita",
    highlight=True,  # Add this line to enable highlighting for repeated countries
    nan_fill_color='gray',  # Add this line to set a color for missing values
    reset=True  # Add this line to reset the map between frames
).add_to(map)

folium.LayerControl().add_to(map)

map

#observation, in 2015, the country with high GDP per capita was Norway follow by USA, Canada, Australia, and some of Europe and some of Asia.

In [None]:
# Setup a folium map at a high-level zoom
map = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2016,
    columns=['Country', 'GDP_per_Capita'],
    key_on='feature.properties.ADMIN',  # Update the key_on parameter based on the correct key path
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="GDP_per_Capita",
    highlight=True,  # Add this line to enable highlighting for repeated countries
    nan_fill_color='gray',  # Add this line to set a color for missing values
    reset=True  # Add this line to reset the map between frames
).add_to(map)

folium.LayerControl().add_to(map)

map

#observation, in 2016, the country with high GDP per capita was Norway follow by USA, Canada, Australia, and some of Europe and some of Asia.

In [None]:
# Setup a folium map at a high-level zoom
map = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2018,
    columns=['Country', 'GDP_per_Capita'],
    key_on='feature.properties.ADMIN',  # Update the key_on parameter based on the correct key path
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="GDP_per_Capita",
    highlight=True,  # Add this line to enable highlighting for repeated countries
    nan_fill_color='gray',  # Add this line to set a color for missing values
    reset=True  # Add this line to reset the map between frames
).add_to(map)

folium.LayerControl().add_to(map)

map

#observation, in 2018, the USA and Saudi Arabia along with Norway went up on gpd_per_capita 

In [None]:
# Setup a folium map at a high-level zoom
map = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2019,
    columns=['Country', 'GDP_per_Capita'],
    key_on='feature.properties.ADMIN',  # Update the key_on parameter based on the correct key path
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="GDP_per_Capita",
    highlight=True,  # Add this line to enable highlighting for repeated countries
    nan_fill_color='gray',  # Add this line to set a color for missing values
    reset=True  # Add this line to reset the map between frames
).add_to(map)

folium.LayerControl().add_to(map)

map

#observation, in 2019, we see the USA moving up along with Norway with high GDP_per_Capita

### Country vs Life Expectancy

In [None]:
# Setup a folium map at a high-level zoom
map = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2015,
    columns=['Country', 'Life_Expectancy'],
    key_on='feature.properties.ADMIN',  # Update the key_on parameter based on the correct key path
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="Life Expectancy",
    highlight=True,  # Add this line to enable highlighting for repeated countries
    nan_fill_color='gray',  # Add this line to set a color for missing values
    reset=True  # Add this line to reset the map between frames
).add_to(map)

folium.LayerControl().add_to(map)

map

#observation, in 2015, countries with high life expectancy are Canada, USA, Australia, and some of Europe.

In [None]:
# Setup a folium map at a high-level zoom
map = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2016,
    columns=['Country', 'Life_Expectancy'],
    key_on='feature.properties.ADMIN',  # Update the key_on parameter based on the correct key path
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="Life Expectancy",
    highlight=True,  # Add this line to enable highlighting for repeated countries
    nan_fill_color='gray',  # Add this line to set a color for missing values
    reset=True  # Add this line to reset the map between frames
).add_to(map)

folium.LayerControl().add_to(map)

map

#observation, in 2016, the USA life expenctancy went down as well some of South America

In [None]:
# Setup a folium map at a high-level zoom
map = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2018,
    columns=['Country', 'Life_Expectancy'],
    key_on='feature.properties.ADMIN',  # Update the key_on parameter based on the correct key path
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="Life Expectancy",
    highlight=True,  # Add this line to enable highlighting for repeated countries
    nan_fill_color='gray',  # Add this line to set a color for missing values
    reset=True  # Add this line to reset the map between frames
).add_to(map)

folium.LayerControl().add_to(map)

map

#observation, in 2018, canada, Australia, New Zealand and some of Europe had high life expenctancy

In [None]:
# Setup a folium map at a high-level zoom
map = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data=country_geo, 
    data=df_merged_data_2019,
    columns=['Country', 'Life_Expectancy'],
    key_on='feature.properties.ADMIN',  # Update the key_on parameter based on the correct key path
    fill_color='YlOrBr',
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="Life Expectancy",
    highlight=True,  # Add this line to enable highlighting for repeated countries
    nan_fill_color='gray',  # Add this line to set a color for missing values
    reset=True  # Add this line to reset the map between frames
).add_to(map)

folium.LayerControl().add_to(map)

map

#observation, in 2019, canada, Australia, New Zealand and some of Europe had high life expenctancy. while South America scale moves up. 

# Save Maps

In [None]:
map.save('plot_data.html')

In [None]:
map1.save('plot_data.html')

# Discuss for Results

#Does the analysis answer any of your existing research questions?
Q:How does the distribution of Happiness Scores vary across different continents or regions?
A:Countries with the highest happiness score were usually Canada, USA, Australia, New Zealand, Norway, Sweden, Finland, Germany, United Kigdom, Ireland, and Netherlands. the lowest score, countries are in some African countries and some in the South Asia. Over the years, until 2019, the USA drop on the happiness score. 

Q:How does the distribution of GDP per capita vary across different continents or regions?
A:from 2015-2017 we see the country with high GDP per capita was Norway follow by USA, Canada, Australia, and some of Europe and some of Asia. in 2018-2019 we see USA and Norway high in gdp per capita.

Q:How does the distribution of social support vary across different continents or regions?
A:from 2015-2019 countries with high social support are still Canada, USA, South America, Australia, Europe and Asia stayed the same. 

Q:How does the distribution of high life expectancy vary across different continents or regions?
A:In 2015, countries with high life expectancy are Canada, USA, Australia, and some of Europe. the following years, we see USA life expectancy droping.

#Does the analysis lead you to any new research questions?
How does GDP per capita, social support, and life expectancy vary over the years for these countries? we do see gpd per catita change over the years, not much change in social support, and slight change in life expectancy for top countries like USA, Canada, Australia, New Zealand, and some of Europe countries. 