In [1]:
# Import Dependencies
from pathlib import Path
import pandas as pd
import requests
import json
from api_key import open_weather_api_key
import numpy as np
from datetime import datetime, timedelta
import calendar

In [2]:
#Create a path variable to the data
cities_data = Path('Resources/Cities_Au.csv')

In [3]:
# Import the data into a Pandas DataFrame
cities_data_df = pd.read_csv(cities_data, encoding="UTF-8")
cities_data_df.head()

Unnamed: 0,city,lat,lng,country,iso2,admin_name,capital,population,population_proper
0,Melbourne,-37.8142,144.9631,Australia,AU,Victoria,admin,5031195,5031195
1,Sydney,-33.8678,151.21,Australia,AU,New South Wales,admin,4840600,4840600
2,Brisbane,-27.4678,153.0281,Australia,AU,Queensland,admin,2360241,2360241
3,Perth,-31.9559,115.8606,Australia,AU,Western Australia,admin,2141834,2141834
4,Adelaide,-34.9275,138.6,Australia,AU,South Australia,admin,1295714,1295714


In [4]:
#Clean the dataframe
cleaned_cities_df = cities_data_df.drop(columns=['capital', 'population_proper', 'admin_name'])
cleaned_cities_df.head()

Unnamed: 0,city,lat,lng,country,iso2,population
0,Melbourne,-37.8142,144.9631,Australia,AU,5031195
1,Sydney,-33.8678,151.21,Australia,AU,4840600
2,Brisbane,-27.4678,153.0281,Australia,AU,2360241
3,Perth,-31.9559,115.8606,Australia,AU,2141834
4,Adelaide,-34.9275,138.6,Australia,AU,1295714


In [14]:
#Set up a list of time frame from Jan 2023 to Jan 2024 mothly
time = np.arange(np.datetime64('2020-01-06'), np.datetime64('2021-01-01'),timedelta(days=30))
ux_time = time.astype('datetime64[s]').astype('int')
ux_time

array([1578268800, 1580860800, 1583452800, 1586044800, 1588636800,
       1591228800, 1593820800, 1596412800, 1599004800, 1601596800,
       1604188800, 1606780800, 1609372800])

In [16]:
#Create empty list for storing data
aqi =[]
co = []
no2 =[]

#Loop through all cities/row in dataframe
for index, row in cleaned_cities_df.iterrows():
    lat = row["lat"]
    lon =row["lng"]
    #Loop through all time in time frame
    for i in range(len(ux_time)):
        start_date = ux_time[i]
        end_date = start_date + 1
        daily_url = f"http://api.openweathermap.org/data/2.5/air_pollution/history?lat={lat}&lon={lon}&start={start_date}&end={end_date}&appid={open_weather_api_key}"
    try:
        data = requests.get(daily_url).json()
        #Add data to respective lists
        aqi.append(data["list"][0]["main"]["aqi"])
        co.append(data["list"][0]["components"]['co'])
        no2.append(data["list"][0]["components"]['no2'])
        #Calculate average data
        average_aqi = sum(aqi) / len(aqi)
        average_co = sum(co) / len(co)
        average_no2 = sum(no2) / len(no2)
        #Add data to dataframe
        cleaned_cities_df.loc[index, "average monthly AQI"] = average_aqi
        cleaned_cities_df.loc[index, "average monthly CO"] = average_co
        cleaned_cities_df.loc[index, "average monthly NO2"] = average_no2
    except (KeyError, IndexError):

        cleaned_cities_df.loc[index, "average monthly AQI"] = np.nan
        cleaned_cities_df.loc[index, "average monthly CO"] = np.nan
        cleaned_cities_df.loc[index, "average monthly NO2"] = np.nan






In [17]:
#Show the completed dataframe
cleaned_cities_df

Unnamed: 0,city,lat,lng,country,iso2,population,average monthly AQI,average monthly CO,average monthly NO2
0,Melbourne,-37.8142,144.9631,Australia,AU,5031195,1.000000,195.270000,12.170000
1,Sydney,-33.8678,151.2100,Australia,AU,4840600,1.000000,184.420000,8.355000
2,Brisbane,-27.4678,153.0281,Australia,AU,2360241,1.000000,183.583333,6.796667
3,Perth,-31.9559,115.8606,Australia,AU,2141834,1.250000,190.675000,7.560000
4,Adelaide,-34.9275,138.6000,Australia,AU,1295714,1.200000,189.256000,6.348000
...,...,...,...,...,...,...,...,...,...
312,Yallourn North,-38.1667,146.3667,Australia,AU,1107,1.022364,172.406645,1.845112
313,Innisfail,-17.5238,146.0311,Australia,AU,1091,1.022293,172.447548,1.840446
314,Mollymook,-35.3297,150.4753,Australia,AU,1083,1.022222,172.414032,1.835873
315,Evandale,-41.5702,147.2467,Australia,AU,1058,1.022152,172.380728,1.830633


In [29]:
#According to "", cities with population less than 15,0000 are considered as rural.
#Add classification to dataframe
for index, row in cleaned_cities_df.iterrows():
    if row["population"] > 10000:
        cleaned_cities_df.loc[index, "classification"] = "Urban"
    else:
        cleaned_cities_df.loc[index, "classification"] = "Rural"

cleaned_cities_df

Unnamed: 0,city,lat,lng,country,iso2,population,average monthly AQI,average monthly CO,average monthly NO2,classification
0,Melbourne,-37.8142,144.9631,Australia,AU,5031195,1.000000,220.300000,10.200000,Urban
1,Sydney,-33.8678,151.2100,Australia,AU,4840600,1.000000,231.980000,18.125000,Urban
2,Brisbane,-27.4678,153.0281,Australia,AU,2360241,1.000000,261.463333,12.896667,Urban
3,Perth,-31.9559,115.8606,Australia,AU,2141834,1.000000,258.682500,11.300000,Urban
4,Adelaide,-34.9275,138.6000,Australia,AU,1295714,1.000000,247.000000,9.438000,Urban
...,...,...,...,...,...,...,...,...,...,...
312,Yallourn North,-38.1667,146.3667,Australia,AU,1107,1.047923,216.838051,1.811438,Rural
313,Innisfail,-17.5238,146.0311,Australia,AU,1091,1.047771,216.912866,1.807102,Rural
314,Mollymook,-35.3297,150.4753,Australia,AU,1083,1.047619,216.817651,1.802381,Rural
315,Evandale,-41.5702,147.2467,Australia,AU,1058,1.047468,216.733608,1.797627,Rural


In [30]:
#Group data by Classification
grouped_city_df = cleaned_cities_df.groupby(["classification"])
#Calculate monthly average data
mean_aqi = grouped_city_df['average monthly AQI'].mean()
mean_co = grouped_city_df['average monthly CO'].mean()
mean_no2 = grouped_city_df['average monthly NO2'].mean()

In [31]:
#Create a new dataframe that contains monthly average data
rural_urban_df = pd.DataFrame({"Monthly Average AQI": mean_aqi,
                               "Montly Average CO": mean_co,
                               "Monthly Average NO2": mean_no2})

rural_urban_df

Unnamed: 0_level_0,Monthly Average AQI,Montly Average CO,Monthly Average NO2
classification,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rural,1.059147,223.660318,2.157591
Urban,1.022776,237.082703,4.314809
