In [12]:
#@title Imports
import requests
import pandas as pd
import json
from io import StringIO, BytesIO
import matplotlib.pyplot as plt
from IPython.display import Markdown as md
from datetime import datetime
pd.set_option('display.max_columns', None)

In [13]:
# Maximum number of sensor location that openAQ API retrieves
LOCATION_NUMBER_LIMIT = 5000 

# WGS84 latitude, longitude pair of London, UK
LONDON_UK_COORDINATES = [51.5074, -0.1278] 

# Scoping radius in meters
RADIUS = 70000 

# URL for request
BASE_URL = 'https://api.openaq.org/v2/locations'

In [7]:
def get_date_time(prompt):
    """
    Requests and validates user's input of date and time in ISO 8601 format with timezone offset.
    
    This function will keep prompting the user for input until a valid date and time string is entered.
    
    Args:
        prompt (str): The prompt that will be shown to the user when asking for input.

    Returns:
        datetime.datetime: The validated date and time entered by the user.

    Examples:
    To test this function, you will have to mock user input because it uses the input() function. 
    This isn't directly supported by doctest, so it isn't included here. In a testing suite like pytest, 
    you could use the pytest-mock or unittest.mock library to simulate user input.
    
    >>> from datetime import datetime
    >>> isinstance(get_date_time("Please enter a date and time: "), datetime)
    True
    """
    while True:
        date_str = input(prompt)
        try:
            valid_date = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S%z")
            print("The date and time you entered is valid.")
            break
        except ValueError:
            print("Invalid date and time. Please try again.")
    return valid_date

def get_date_range():
    """
    Requests and validates user's input of a date range in ISO 8601 format with timezone offset.
    
    This function will keep prompting the user for input until valid date and time strings are entered.
    
    Returns:
        tuple: The validated date range entered by the user.

    Examples:
    Testing this function with doctest is tricky, because it requires user input.
    """

    date_from = get_date_time("Please enter the start date and time in ISO 8601 format with timezone offset (YYYY-MM-DDTHH:MM:SS+HH:MM): ")
    date_to = get_date_time("Please enter the end date and time in ISO 8601 format with timezone offset (YYYY-MM-DDTHH:MM:SS+HH:MM): ")
    return [date_from, date_to]


In [8]:
def ask_which_pollutant():
    """
    Asks the user to input the type of pollutant they want to investigate.

    The function will continue to prompt the user until a valid input ('pm25' or 'pm10') is given,
    or the user decides to quit.

    Returns:
        str or bool: Returns the pollutant as a string if a valid input was given, 
        otherwise False if the user chose to quit.

    Examples:
    To test this function manually, you can call it and observe its behavior with different inputs.

    >>> ask_which_pollutant()
    Enter the pollutant you want to investigate [pm25/pm10]: pm25
    'pm25'

    >>> ask_which_pollutant()
    Enter the pollutant you want to investigate [pm25/pm10]: test
    Invalid input. Quit? [y/n]: y
    False

    Note:
    Testing this function with doctest or a similar tool is tricky, because it requires user input. You can 
    manually test the function by calling it in your code and entering inputs when prompted. For automated 
    testing, you would need to use a testing framework like pytest or unittest that can handle mocking user input.
    """
    while True:
        pollutant = input("Enter the pollutant you want to investigate [pm25/pm10]: ")
        if pollutant in ['pm25', 'pm10']:
            return pollutant
        else:
            while True:
                quit = input("Invalid input. Quit? [y/n]: ")
                if quit.lower() == 'y':
                    return False
                elif quit.lower() == 'n':
                    break
                else:
                    print("Invalid response. Please enter 'y' or 'n'.")


In [9]:
def check_pollutant(df, x):
    """
    (Docstring omitted for brevity)
    """
    return x in df['parameter'].values

def fetch_and_check(location_id, date_from, date_to, limit, x):
    """
    Fetches air quality data from the OpenAQ API and checks if a given pollutant is present.

    Args:
        location_id (str): The ID of the location to fetch data for.
        date_from (str): The start of the date range to fetch data for, in ISO 8601 format.
        date_to (str): The end of the date range to fetch data for, in ISO 8601 format.
        limit (int): The maximum number of results to fetch.
        x (str): The pollutant to check for.

    Returns:
        bool: True if the pollutant is present in the fetched data, False otherwise.
    """
    params = {
        'location_id': location_id,
        'date_from': date_from,
        'date_to': date_to,
        'limit': limit
    }
    base_url = 'https://api.openaq.org/v2/measurements'
    response = requests.get(base_url, params=params)
    our_location_data = response.json()

    measurements = [{"date": row['date']['local'], "value":row['value'], "parameter":row["parameter"], "unit":row["unit"]} for row in our_location_data['results']]
    df = pd.DataFrame.from_dict(measurements)

    return check_pollutant(df, x)


In [19]:
def execute():
    """
    Fetches air quality data for a given location and checks for a specific pollutant.
    
    This function fetches air quality data from the OpenAQ API for a given location within a certain 
    radius. It then checks whether a specific pollutant, chosen by the user, is present in the data.

    Returns:
        list: A list of location IDs where the specified pollutant is present.
        
    Example:
    Due to the nature of this function (API calls, user input), it's not feasible to provide a doctest.
    """
    params = {
        'coordinates': ','.join(map(str, LONDON_UK_COORDINATES)),
        'radius': RADIUS,
    }
    response = requests.get(BASE_URL, params=params)

    # Check if the request was successful
    if response.status_code != 200:
        raise Exception(f"Request failed with status {response.status_code}")

    data = response.json()

    # Check what keys exist in the data
    print("Data keys:", data.keys())
    
    # Try to access the 'results' key
    try:
        locations = pd.DataFrame(data['results'], columns=['city', 'entity', 'name', 'id'])
    except KeyError:
        print("The key 'results' does not exist in the data.")
        return

    location_id_lst = []
    date_range = get_date_range()
    pollutant = ask_which_pollutant()
    for location_id in locations['id']:
        if fetch_and_check(location_id, date_range[0], date_range[1], LOCATION_NUMBER_LIMIT, pollutant):
            location_id_lst.append(location_id)

    return location_id_lst


### Prompt user input to obtain the set of location id with respect to the specific pollutant


In [20]:
# Location id in London which has the targetted pollutant 
target_location_id = execute()
#print(target_location_id)

Data keys: dict_keys(['meta', 'results'])
Please enter the start date and time in ISO 8601 format with timezone offset (YYYY-MM-DDTHH:MM:SS+HH:MM): 2023-05-30T00:00:00+00:00
The date and time you entered is valid.
Please enter the end date and time in ISO 8601 format with timezone offset (YYYY-MM-DDTHH:MM:SS+HH:MM): 2023-06-02T00:00:00+00:00
The date and time you entered is valid.
Enter the pollutant you want to investigate [pm25/pm10]: pm10


### Now we get the set of location id, we would want to plot the graphs

In [75]:
def fetch_measurements(location_id, date_from, date_to, limit=5000):
    """Fetches air quality measurements from the OpenAQ API.

    Args:
        location_id (str): The id of the location to get measurements for.
        date_from (str): The start date in ISO 8601 format.
        date_to (str): The end date in ISO 8601 format.
        limit (int, optional): The maximum number of measurements to fetch. Defaults to 5000.

    Returns:
        DataFrame: The measurements for the given location and date range.
    """
    base_url = 'https://api.openaq.org/v2/measurements'
    params = {
        'location_id': location_id, 
        'date_from': date_from,
        'date_to': date_to,
        'limit': limit
    }

    response = requests.get(base_url, params=params)
    response.raise_for_status()

    data = response.json()['results']
    measurements = [{"date": row['date']['local'], "value":row['value'], "parameter":row["parameter"], "unit":row["unit"]} for row in data]

    df = pd.DataFrame(measurements)
    df['date'] = pd.to_datetime(df['date'])

    return df

def plot_measurements(df, pollutant):
    """Plots the measurements for a given pollutant over time.

    Args:
        df (DataFrame): The measurements to plot.
        pollutant (str): The pollutant to plot.
    """
    df = df[df['parameter'] == pollutant]
    df.set_index('date', inplace=True)
    
    plt.figure(figsize=[16,8])        
    plt.plot(df.index, df['value']) 
    plt.scatter(df.index, df['value'])
    plt.legend([pollutant])       
    plt.xlabel('date')
    plt.ylabel('value')
    plt.title(f'{pollutant} µg/m³ overtime')
    plt.show()

def analyse(location_id, date_from='2023-05-15T00:00:00+00:00',date_to='2023-06-02T23:59:00+00:00', limit=5000):
    """Fetches measurements and plots them for a given location and date range.

    Args:
        location_id (str): The id of the location to get measurements for.
        date_from (str): The start date in ISO 8601 format.
        date_to (str): The end date in ISO 8601 format.
        limit (int, optional): The maximum number of measurements to fetch. Defaults to 5000.
    """
    df = fetch_measurements(location_id, date_from, date_to, limit)

    print(df['parameter'].value_counts())

    pollutant = input('Enter the pollutant: ')
    if pollutant not in df['parameter'].unique():
        print(f'Invalid pollutant. Available options are {df["parameter"].unique()}')
        return

    plot_measurements(df, pollutant)

In [None]:
# Change the program such that it can automatically filter out locations such that you choose the pollutant first
# Date and time from and to
# Then the program will filter out which locations do measure them and then 
# You can all the graph from the different locations. 
