# <center><div class="alert alert-block alert-info">This code extracts data from an <b>API</b>, Transforms it and Loads it into a Database</div></center>

## The API call here will be from __[OpenWeatherMap.org](https://openweathermap.org/api/air-pollution)__ to collect Air Pollution data for a specific coordinate

### Importing needed packages

In [1]:

import pandas as pd
import requests


### Required variables

In [3]:
# API key needed for the call - generated from the web site -
api_key = 'bd0251f8398a0ebec1613513b5d6ceca'

# Coordinate of the specific area we are interested in - here Central Omaha -
lat, lon = 41.24571173787448, -96.0306766000668

# Start and End date of the period we are interested in - here 2023/11/02 to 2024/01/01 - 
# in Unix Time format
start_date, end_date = 1698883200, 1704067200

# Actual API call structure
api_call = f'http://api.openweathermap.org/data/2.5/air_pollution/history?\
lat={lat}&lon={lon}&start={start_date}&end={end_date}&appid={api_key}'

### Launching the API call

In [4]:
# Querying the web site - a Response [200] mean successful -
api_request = requests.get(api_call)
api_request

<Response [200]>

In [5]:
# Converting the request's response to JSON
response = api_request.json()
response


{'coord': {'lon': -96.0307, 'lat': 41.2457},
 'list': [{'main': {'aqi': 1},
   'components': {'co': 360.49,
    'no': 0.03,
    'no2': 24.33,
    'o3': 46.49,
    'so2': 0.94,
    'pm2_5': 5.55,
    'pm10': 7.21,
    'nh3': 2.72},
   'dt': 1698883200},
  {'main': {'aqi': 1},
   'components': {'co': 377.18,
    'no': 0.04,
    'no2': 26.39,
    'o3': 38.62,
    'so2': 0.86,
    'pm2_5': 6.19,
    'pm10': 7.9,
    'nh3': 2.94},
   'dt': 1698886800},
  {'main': {'aqi': 1},
   'components': {'co': 367.16,
    'no': 0.03,
    'no2': 23.65,
    'o3': 39.34,
    'so2': 0.77,
    'pm2_5': 6.29,
    'pm10': 8.02,
    'nh3': 2.85},
   'dt': 1698890400},
  {'main': {'aqi': 1},
   'components': {'co': 337.12,
    'no': 0.01,
    'no2': 18.16,
    'o3': 45.06,
    'so2': 0.68,
    'pm2_5': 5.64,
    'pm10': 7.3,
    'nh3': 2.6},
   'dt': 1698894000},
  {'main': {'aqi': 1},
   'components': {'co': 330.45,
    'no': 0.02,
    'no2': 16.45,
    'o3': 45.06,
    'so2': 0.59,
    'pm2_5': 5.42,
    'pm1

> The data is of a dictionary type

### Transforming the data extracted

In [6]:
# Dictionary keys
print(response.keys())

dict_keys(['coord', 'list'])


In [7]:
# Recuperating only the 'list' key value
response['list']

[{'main': {'aqi': 1},
  'components': {'co': 360.49,
   'no': 0.03,
   'no2': 24.33,
   'o3': 46.49,
   'so2': 0.94,
   'pm2_5': 5.55,
   'pm10': 7.21,
   'nh3': 2.72},
  'dt': 1698883200},
 {'main': {'aqi': 1},
  'components': {'co': 377.18,
   'no': 0.04,
   'no2': 26.39,
   'o3': 38.62,
   'so2': 0.86,
   'pm2_5': 6.19,
   'pm10': 7.9,
   'nh3': 2.94},
  'dt': 1698886800},
 {'main': {'aqi': 1},
  'components': {'co': 367.16,
   'no': 0.03,
   'no2': 23.65,
   'o3': 39.34,
   'so2': 0.77,
   'pm2_5': 6.29,
   'pm10': 8.02,
   'nh3': 2.85},
  'dt': 1698890400},
 {'main': {'aqi': 1},
  'components': {'co': 337.12,
   'no': 0.01,
   'no2': 18.16,
   'o3': 45.06,
   'so2': 0.68,
   'pm2_5': 5.64,
   'pm10': 7.3,
   'nh3': 2.6},
  'dt': 1698894000},
 {'main': {'aqi': 1},
  'components': {'co': 330.45,
   'no': 0.02,
   'no2': 16.45,
   'o3': 45.06,
   'so2': 0.59,
   'pm2_5': 5.42,
   'pm10': 7.02,
   'nh3': 2.56},
  'dt': 1698897600},
 {'main': {'aqi': 1},
  'components': {'co': 347.14,


> This key 'list' value is a list of dictionaries

#### Restructuring the 1rst element of the list

In [9]:
# Verifying that 1rst element
print(response['list'][0])

# Transforming that 1rst element - aq: air quality -
aq_components_dict = response['list'][0]['components']
aq_components_dict['aqi'] = response['list'][0]['main']['aqi']
aq_components_dict['date'] = response['list'][0]['dt']
print(aq_components_dict)

{'main': {'aqi': 1}, 'components': {'co': 360.49, 'no': 0.03, 'no2': 24.33, 'o3': 46.49, 'so2': 0.94, 'pm2_5': 5.55, 'pm10': 7.21, 'nh3': 2.72}, 'dt': 1698883200}
{'co': 360.49, 'no': 0.03, 'no2': 24.33, 'o3': 46.49, 'so2': 0.94, 'pm2_5': 5.55, 'pm10': 7.21, 'nh3': 2.72, 'aqi': 1, 'date': 1698883200}


> Now we have all needed information regrouped in `1` single dictionary

In [11]:
# Looping through the hole list to rebuild 
aq_components_list = []

for i in range(len(response['list'])):
    aq_components_dict = response['list'][i]['components']
    aq_components_dict['aqi'] = response['list'][i]['main']['aqi']
    aq_components_dict['date'] = response['list'][i]['dt']
    aq_components_list.append(aq_components_dict)

aq_components_list


[{'co': 360.49,
  'no': 0.03,
  'no2': 24.33,
  'o3': 46.49,
  'so2': 0.94,
  'pm2_5': 5.55,
  'pm10': 7.21,
  'nh3': 2.72,
  'aqi': 1,
  'date': 1698883200},
 {'co': 377.18,
  'no': 0.04,
  'no2': 26.39,
  'o3': 38.62,
  'so2': 0.86,
  'pm2_5': 6.19,
  'pm10': 7.9,
  'nh3': 2.94,
  'aqi': 1,
  'date': 1698886800},
 {'co': 367.16,
  'no': 0.03,
  'no2': 23.65,
  'o3': 39.34,
  'so2': 0.77,
  'pm2_5': 6.29,
  'pm10': 8.02,
  'nh3': 2.85,
  'aqi': 1,
  'date': 1698890400},
 {'co': 337.12,
  'no': 0.01,
  'no2': 18.16,
  'o3': 45.06,
  'so2': 0.68,
  'pm2_5': 5.64,
  'pm10': 7.3,
  'nh3': 2.6,
  'aqi': 1,
  'date': 1698894000},
 {'co': 330.45,
  'no': 0.02,
  'no2': 16.45,
  'o3': 45.06,
  'so2': 0.59,
  'pm2_5': 5.42,
  'pm10': 7.02,
  'nh3': 2.56,
  'aqi': 1,
  'date': 1698897600},
 {'co': 347.14,
  'no': 0.05,
  'no2': 19.71,
  'o3': 37.19,
  'so2': 0.51,
  'pm2_5': 5.91,
  'pm10': 7.49,
  'nh3': 2.56,
  'aqi': 1,
  'date': 1698901200},
 {'co': 347.14,
  'no': 0.05,
  'no2': 19.19,
  '

> Now we have a nice list tof dictionary 

#### Converting the list to pandas DataFrame

In [12]:
# 
aq_df = pd.DataFrame(aq_components_list)
aq_df

Unnamed: 0,co,no,no2,o3,so2,pm2_5,pm10,nh3,aqi,date
0,360.49,0.03,24.33,46.49,0.94,5.55,7.21,2.72,1,1698883200
1,377.18,0.04,26.39,38.62,0.86,6.19,7.90,2.94,1,1698886800
2,367.16,0.03,23.65,39.34,0.77,6.29,8.02,2.85,1,1698890400
3,337.12,0.01,18.16,45.06,0.68,5.64,7.30,2.60,1,1698894000
4,330.45,0.02,16.45,45.06,0.59,5.42,7.02,2.56,1,1698897600
...,...,...,...,...,...,...,...,...,...,...
1436,257.02,0.37,3.08,65.09,1.01,0.58,0.74,0.97,2,1704052800
1437,257.02,0.27,3.34,63.66,0.92,0.58,0.75,1.03,2,1704056400
1438,257.02,0.12,3.73,61.51,0.86,0.57,0.74,1.11,2,1704060000
1439,263.69,0.03,4.50,59.37,0.85,0.60,0.78,1.25,1,1704063600


### Loading the data to a database

> Here I used sqlite to create a database in the same folder as my code file

In [13]:
# Importing required package
import sqlite3


In [14]:
# Connecting to the database and loading the data
conn = sqlite3.connect('Air_quality.db')
aq_df.to_sql(name='aq_index', con=conn, if_exists='replace', index=False)
conn.close()

> We can also load the data localy straigh into a folder 

In [15]:
# Loading the data into the current working directory as a CSV file
aq_df.to_csv('aq_index.csv', index=False)

<b>Notice that so far all we have done is extract the data and transform it.<br>
No cleaning is made yet as cleaning involves handling duplicate entries, outliers, inacurate, unwanted, irrelevant, and missing data. And fixing structured errors as well.<br>We will go through data cleaning in detail at the Analysis part.<b>