## 1. Importing libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import os
import folium
import json
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline

In [3]:
# Import ".json" file
country_geo = r'C:\Users\sdaon\Documents\Achievement 6/world-countries.json'

In [10]:
# That's just in case you want to look at the JSON file contents here too:

f = open(country_geo)

# returns JSON object asa dictionary
data = json.load(f)

# Print and check the first record of the json list for 
print(data['features'][0])

{'type': 'Feature', 'properties': {'name': 'Afghanistan'}, 'geometry': {'type': 'Polygon', 'coordinates': [[[61.210817, 35.650072], [62.230651, 35.270664], [62.984662, 35.404041], [63.193538, 35.857166], [63.982896, 36.007957], [64.546479, 36.312073], [64.746105, 37.111818], [65.588948, 37.305217], [65.745631, 37.661164], [66.217385, 37.39379], [66.518607, 37.362784], [67.075782, 37.356144], [67.83, 37.144994], [68.135562, 37.023115], [68.859446, 37.344336], [69.196273, 37.151144], [69.518785, 37.608997], [70.116578, 37.588223], [70.270574, 37.735165], [70.376304, 38.138396], [70.806821, 38.486282], [71.348131, 38.258905], [71.239404, 37.953265], [71.541918, 37.905774], [71.448693, 37.065645], [71.844638, 36.738171], [72.193041, 36.948288], [72.63689, 37.047558], [73.260056, 37.495257], [73.948696, 37.421566], [74.980002, 37.41999], [75.158028, 37.133031], [74.575893, 37.020841], [74.067552, 36.836176], [72.920025, 36.720007], [71.846292, 36.509942], [71.262348, 36.074388], [71.498768,

In [None]:
path = r'C:\Users\sdaon\Documents\Global Air Pollution Dataset'

In [None]:
df = pd.read_csv(os.path.join(path, 'Data', 'Prepared Data', 'global air pollution dataset_cleaned.csv'), index_col = False)

In [None]:
df.head()

In [None]:
df.shape

## Data Wrangling

In [None]:
df.columns

In [None]:
#drop unname columns for df
df = df.drop(columns = ['Unnamed: 0'])

In [None]:
df.columns

In [None]:
df.isnull().sum()

In [None]:
df_dups = df[df.duplicated()]

In [None]:
df_dups.shape

### check

In [None]:
sns.histplot(df['AQI Value'], bins=25, kde = True)

#### There is no extrame values as the AQI is not more than 500.

In [None]:
sns.histplot(df['CO AQI Value'], bins=15, kde = True)

In [None]:
df[df['CO AQI Value'] > 13]

#### There are extrame values but they don't have impact on my analysis. As I want to investiate them more, I decide to keep them.

In [None]:
sns.histplot(df['Ozone AQI Value'], bins=25, kde = True)

In [None]:
sns.histplot(df['NO2 AQI Value'], bins=25, kde = True)

In [None]:
sns.histplot(df['PM2.5 AQI Value'], bins=25, kde = True)

#### There is no extrame values as the PM2.5 AQI Value is not more than 500.

### Plotting a Choropleth

In [None]:
# Create a data frame with just the states and the values for rating we want plotted

df_new = df[['Country','AQI Value']]
df_new.head()

In [None]:
# Setup a folium map at a high-level zoom
map = folium.Map(location = [100, 0], zoom_start = 1.5)

# Choropleth maps bind Pandas Data Frames and json geometries.This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data = country_geo, 
    data = df_new,
    columns = ['Country','AQI Value'],
    key_on = 'feature.properties.name', # this part is very important - check your json file to see where the KEY is located
    fill_color = 'YlOrBr', 
    fill_opacity=0.6, 
    line_opacity=0.1,
    legend_name = "Air Value").add_to(map)
folium.LayerControl().add_to(map)

map

In [None]:
map.save('plot_data.html')