In [1]:
# Dependencies and Setup
from pathlib import Path
import pandas as pd
import requests
import json
import matplotlib.pyplot as plt
import numpy as np
import time

# Impor the OpenWeatherMap API key
from scipy.stats import linregress
from config import airquality_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy
from geopy.geocoders import Nominatim
from datetime import datetime

In [2]:
# File to Load 
cause_of_death_data = Path("Resources/cause_of_deaths.csv")

#Read CSV file into daraframe
cause_of_death = pd.read_csv(cause_of_death_data)
cause_of_death

# Import the API key
#url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={API key}"

Unnamed: 0,Country/Territory,Code,Year,Meningitis,Alzheimer's Disease and Other Dementias,Parkinson's Disease,Nutritional Deficiencies,Malaria,Drowning,Interpersonal Violence,...,Diabetes Mellitus,Chronic Kidney Disease,Poisonings,Protein-Energy Malnutrition,Road Injuries,Chronic Respiratory Diseases,Cirrhosis and Other Chronic Liver Diseases,Digestive Diseases,"Fire, Heat, and Hot Substances",Acute Hepatitis
0,Afghanistan,AFG,1990,2159,1116,371,2087,93,1370,1538,...,2108,3709,338,2054,4154,5945,2673,5005,323,2985
1,Afghanistan,AFG,1991,2218,1136,374,2153,189,1391,2001,...,2120,3724,351,2119,4472,6050,2728,5120,332,3092
2,Afghanistan,AFG,1992,2475,1162,378,2441,239,1514,2299,...,2153,3776,386,2404,5106,6223,2830,5335,360,3325
3,Afghanistan,AFG,1993,2812,1187,384,2837,108,1687,2589,...,2195,3862,425,2797,5681,6445,2943,5568,396,3601
4,Afghanistan,AFG,1994,3027,1211,391,3081,211,1809,2849,...,2231,3932,451,3038,6001,6664,3027,5739,420,3816
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6115,Zimbabwe,ZWE,2015,1439,754,215,3019,2518,770,1302,...,3176,2108,381,2990,2373,2751,1956,4202,632,146
6116,Zimbabwe,ZWE,2016,1457,767,219,3056,2050,801,1342,...,3259,2160,393,3027,2436,2788,1962,4264,648,146
6117,Zimbabwe,ZWE,2017,1460,781,223,2990,2116,818,1363,...,3313,2196,398,2962,2473,2818,2007,4342,654,144
6118,Zimbabwe,ZWE,2018,1450,795,227,2918,2088,825,1396,...,3381,2240,400,2890,2509,2849,2030,4377,657,139


In [3]:
#delete columns we dont need
delete_columns = ["Meningitis", "Alzheimer's Disease and Other Dementias", "Parkinson's Disease", "Nutritional Deficiencies", 
                  "Malaria","Drowning", "Interpersonal Violence", "Maternal Disorders", "HIV/AIDS", "Drug Use Disorders", 
                  "Tuberculosis", "Cardiovascular Diseases", "Neonatal Disorders", "Alcohol Use Disorders", "Self-harm",
                  "Exposure to Forces of Nature", "Diarrheal Diseases", "Environmental Heat and Cold Exposure", "Neoplasms", 
                  "Conflict and Terrorism", "Environmental Heat and Cold Exposure", "Neoplasms", "Conflict and Terrorism", 
                  "Diabetes Mellitus", "Chronic Kidney Disease", "Poisonings", "Protein-Energy Malnutrition", "Road Injuries", 
                  "Cirrhosis and Other Chronic Liver Diseases", "Digestive Diseases", "Fire, Heat, and Hot Substances", 
                  "Acute Hepatitis"]
cause_of_death.drop(columns = delete_columns, inplace = True)
cause_of_death.rename(columns = {'Country/Territory': 'Country'}, inplace = True)
cause_of_death

Unnamed: 0,Country,Code,Year,Lower Respiratory Infections,Chronic Respiratory Diseases
0,Afghanistan,AFG,1990,23741,5945
1,Afghanistan,AFG,1991,24504,6050
2,Afghanistan,AFG,1992,27404,6223
3,Afghanistan,AFG,1993,31116,6445
4,Afghanistan,AFG,1994,33390,6664
...,...,...,...,...,...
6115,Zimbabwe,ZWE,2015,12974,2751
6116,Zimbabwe,ZWE,2016,13024,2788
6117,Zimbabwe,ZWE,2017,12961,2818
6118,Zimbabwe,ZWE,2018,12860,2849


In [4]:
#Filter the data to include only the rows between 2014 and 2019
cause_of_death_filtered = cause_of_death[(cause_of_death['Year'] >= 2014) & (cause_of_death['Year'] <= 2019)]
cause_of_death_filtered

Unnamed: 0,Country,Code,Year,Lower Respiratory Infections,Chronic Respiratory Diseases
24,Afghanistan,AFG,2014,21750,6851
25,Afghanistan,AFG,2015,21016,6835
26,Afghanistan,AFG,2016,20601,6894
27,Afghanistan,AFG,2017,19868,6982
28,Afghanistan,AFG,2018,19014,7036
...,...,...,...,...,...
6115,Zimbabwe,ZWE,2015,12974,2751
6116,Zimbabwe,ZWE,2016,13024,2788
6117,Zimbabwe,ZWE,2017,12961,2818
6118,Zimbabwe,ZWE,2018,12860,2849


In [5]:
#pivot the df to reshape it with country as index, year as columns and lower and chronic respiratory as values
yealy_data = cause_of_death_filtered.pivot(index = 'Country', columns = 'Year', values = ['Lower Respiratory Infections','Chronic Respiratory Diseases'])

#reset the index
yealy_data.reset_index(inplace = True)
yealy_data

Unnamed: 0_level_0,Country,Lower Respiratory Infections,Lower Respiratory Infections,Lower Respiratory Infections,Lower Respiratory Infections,Lower Respiratory Infections,Lower Respiratory Infections,Chronic Respiratory Diseases,Chronic Respiratory Diseases,Chronic Respiratory Diseases,Chronic Respiratory Diseases,Chronic Respiratory Diseases,Chronic Respiratory Diseases
Year,Unnamed: 1_level_1,2014,2015,2016,2017,2018,2019,2014,2015,2016,2017,2018,2019
0,Afghanistan,21750,21016,20601,19868,19014,18697,6851,6835,6894,6982,7036,7082
1,Albania,401,418,430,440,448,457,702,737,759,779,797,815
2,Algeria,5543,5634,5567,5605,5704,5786,6536,6739,6903,7100,7320,7528
3,American Samoa,11,11,12,12,12,12,21,22,22,22,23,23
4,Andorra,18,19,19,20,20,20,35,36,37,38,39,39
...,...,...,...,...,...,...,...,...,...,...,...,...,...
199,Venezuela,4967,5018,5094,5154,5251,5555,5285,5506,5813,6105,6427,7062
200,Vietnam,21646,21572,21492,21460,21408,21345,33943,34289,34806,35312,35785,36229
201,Yemen,6613,6585,6532,6461,6350,6289,4825,4981,5162,5368,5569,5738
202,Zambia,9292,8921,8602,8494,8259,8190,2180,2214,2249,2302,2341,2388


In [6]:
# Set the API base URL
base_url = "http://api.openweathermap.org/data/2.5/air_pollution/history"

# Initialize a geocoder
geolocator = Nominatim(user_agent="my_geocoder")

# Define a function to map country name to latitude and longitude
def map_country_to_lat_lon(Country):
    location = geolocator.geocode(Country)
    if location:
        return location.latitude, location.longitude
    else:
        return None, None
        
#define start and end date 
start_date = "1606223802"
end_date = "1606482999"

# Example usage
latitude, longitude = map_country_to_lat_lon('location')

# Define an empty list to fetch the air quality data for each country
country_air_quality_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Loop through all the countries in your DataFrame to fetch air quality data
for country in cause_of_death_filtered['Country']:
    try:
        # Get latitude and longitude for the country 
        lat, lon = map_country_to_lat_lon(country)

        # Create endpoint URL with latitude and longitude of the country
        url = f"{base_url}?lat={lat}&lon={lon}&start={start_date}&end={end_date}&appid={airquality_key}"

        # Run an API request for the country
        response = requests.get(url)
        data = response.json()
        #print("API Response:", data)
        #print(data['list'])

        # Extract air quality data for the country
        pm2_5 = data['list'][0]['components']['pm2_5']
        pm10 = data['list'][0]['components']['pm10']
        o3 = data['list'][0]['components']['o3']
        no2 = data['list'][0]['components']['no2']
        so2 = data['list'][0]['components']['so2']
        
        # Append the air quality data to the list
        country_air_quality_data.append({"Country": country, "PM2_5": pm2_5, "PM 10": pm10, "O3": o3, "NO2": no2, "SO2": so2})
        country_air_quality_data
        
    except Exception as e:
        print(f"Error encountered for {country}: {e}. Skipping...")
        pass
             
# Indicate that Data Retrieval is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")


Beginning Data Retrieval     
-----------------------------
-----------------------------
Data Retrieval Complete      
-----------------------------


In [7]:
air_quality_df = pd.DataFrame(country_air_quality_data)
air_quality_df

Unnamed: 0,Country,PM2_5,PM 10,O3,NO2,SO2
0,Afghanistan,0.81,0.96,75.82,0.11,0.44
1,Afghanistan,0.81,0.96,75.82,0.11,0.44
2,Afghanistan,0.81,0.96,75.82,0.11,0.44
3,Afghanistan,0.81,0.96,75.82,0.11,0.44
4,Afghanistan,0.81,0.96,75.82,0.11,0.44
...,...,...,...,...,...,...
1219,Zimbabwe,4.37,5.49,25.39,1.46,1.54
1220,Zimbabwe,4.37,5.49,25.39,1.46,1.54
1221,Zimbabwe,4.37,5.49,25.39,1.46,1.54
1222,Zimbabwe,4.37,5.49,25.39,1.46,1.54


In [8]:
import requests
url = f"http://api.openweathermap.org/geo/1.0/direct?q=Afghanistan&limit=5&appid={airquality_key}"
headers = {'Content-Type': 'application/.json',}

data = {"country": "afghanistan"}

response = requests.get(url, headers=headers)


In [9]:
response.json()

[{'name': 'Afghanistan',
  'local_names': {'ar': 'أفغانستان', 'en': 'Afghanistan'},
  'lat': 25.2333938,
  'lon': 55.174389025091465,
  'country': 'AE',
  'state': 'Dubai'}]