In [1]:
# Import relevant packages
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import openaq
import warnings

warnings.simplefilter('ignore')

%matplotlib inline

# Set major seaborn asthetics
sns.set("notebook", style='ticks', font_scale=1.0)

# Increase the quality of inline plots
mpl.rcParams['figure.dpi']= 500

print ("pandas v{}".format(pd.__version__))
print ("matplotlib v{}".format(mpl.__version__))
print ("seaborn v{}".format(sns.__version__))
print ("openaq v{}".format(openaq.__version__))

pandas v1.1.3
matplotlib v3.3.1
seaborn v0.11.0
openaq v1.1.0


In [2]:
# Fetch the api
# http://dhhagan.github.io/py-openaq/api.html
api = openaq.OpenAQ()

In [35]:
# TODO
# 1. Drop the ID column, don't need to write that into the file
# 2. The dates are not the same for all cities.. Look into that
# 3. Maybe count locations and provide that into the dataframe as well. 
#    It's relevent to know how many measures are in the city.
# 4. Can we choose certain type of measures



def measurment_to_csv(city,date_from,date_to):
    '''
    This function takes in a city, parameter and date and writes data into a csv.file
    Input:
        city: name of a city (string)
        ???parameter: List of strings that represent the parameters wanted to calculate
        date_from: measurments after this date will be calculated
        date_to: Measures until this date will be calculated
    '''

    # Fetch the data from the measurment api
    df = api.measurements(city = city, df = True, limit = 50000, parameter = ["pm25", "pm10"],
                          date_from = date_from, date_to = date_to)

    ## Data prepping 

    # change the index
    df.index.name = 'Date.local'
    df.reset_index(inplace=True)
    df['Date'] = df['Date.local'].dt.strftime('%Y-%m-%d')
    df['value'] = df['value'].astype(float, errors = 'raise')

    # Calculate mean, max and min value for each date
    Result_mean = df.groupby(['Date', 'parameter'],as_index=False)['value'].mean()
    Result_max = df.groupby(['Date', 'parameter'],as_index=False)['value'].max()
    Result_min = df.groupby(['Date', 'parameter'],as_index=False)['value'].min()

    # Pivot the tables to wide format
    ResultWide_mean = Result_mean.pivot_table(index='Date',columns='parameter', values='value')
    ResultWide_max = Result_max.pivot_table(index='Date',columns='parameter', values='value')
    ResultWide_min = Result_min.pivot_table(index='Date',columns='parameter', values='value')

    # Rename the columns to distinguish
    ResultWide_mean.rename(columns={"pm10": 'pm10_mean', 'pm25': 'pm25_mean'}, inplace=True)
    ResultWide_max.rename(columns={"pm10": 'pm10_max', 'pm25': 'pm25_max'}, inplace=True)
    ResultWide_min.rename(columns={"pm10": 'pm10_min', 'pm25': 'pm25_min'}, inplace=True)

    # Join mean and max first
    df_first_join = pd.merge(ResultWide_mean, ResultWide_max, left_index=True, right_index=True)

    # Join now to min
    ResultWide = pd.merge(df_first_join, ResultWide_min, left_index=True, right_index=True)

    # Change the index
    ResultWide.index.name = 'Date'
    ResultWide.reset_index(inplace=True)
    ResultWide.index.name = 'ID'

    # Write to a file  
    Path = f'Data_measurements/{city}.csv'
    ResultWide.to_csv(Path)

In [37]:
# Call the function
city = 'London'
date_from = '2020-06-01', 
date_to = '2021-10-01'

measurment_to_csv(city,date_from,date_to)

In [39]:
city = 'London'
# If we want to then read in the data
df_city = pd.read_csv(f'Data_measurements/{city}.csv')
df_city.head()

Unnamed: 0,ID,Date,pm10_mean,pm25_mean,pm10_max,pm25_max,pm10_min,pm25_min
0,0,2021-08-27,10.927928,5.25,15.0,11.0,6.0,1.0
1,1,2021-08-28,12.594406,6.518072,19.0,10.0,7.0,4.0
2,2,2021-08-29,10.5625,5.946429,17.0,10.0,7.0,4.0
3,3,2021-08-30,8.222222,4.488095,13.0,7.0,5.0,4.0
4,4,2021-08-31,8.405594,4.377246,14.0,6.0,5.0,3.0
