In [1]:
import requests
import os
from dotenv import load_dotenv

import pandas as pd

In [2]:
load_dotenv()

MET_OFFICE_API_KEY = os.getenv("MET_OFFICE_API_KEY")
FORMAT = "json"

URL_BASIC = "http://datapoint.metoffice.gov.uk/public/data" 
# The next url is example from API documentation
url = f"{URL_BASIC}/val/wxfcs/all/{FORMAT}/3840?res=3hourly&key={MET_OFFICE_API_KEY}"
# [print(item["value"]) for item in response_json["SiteRep"]["DV"]["Location"]["Period"]]
# Get list of all locations
url = f"{URL_BASIC}/val/wxobs/all/{FORMAT}/sitelist?key={MET_OFFICE_API_KEY}"

response = requests.get(url=url)
response_json = response.json()

print(response_json)

{'Locations': {'Location': [{'elevation': '7.0', 'id': '3066', 'latitude': '57.6494', 'longitude': '-3.5606', 'name': 'Kinloss', 'region': 'gr', 'unitaryAuthArea': 'Moray'}, {'elevation': '6.0', 'id': '3068', 'latitude': '57.712', 'longitude': '-3.322', 'obsSource': 'LNDSYN', 'name': 'Lossiemouth', 'region': 'gr', 'unitaryAuthArea': 'Moray'}, {'elevation': '36.0', 'id': '3075', 'latitude': '58.454', 'longitude': '-3.089', 'obsSource': 'LNDSYN', 'name': 'Wick John O Groats Airport', 'region': 'he', 'unitaryAuthArea': 'Highland'}, {'elevation': '15.0', 'id': '3002', 'latitude': '60.749', 'longitude': '-0.854', 'name': 'Baltasound', 'region': 'os', 'unitaryAuthArea': 'Shetland Islands'}, {'elevation': '82.0', 'id': '3005', 'latitude': '60.139', 'longitude': '-1.183', 'obsSource': 'LNDSYN', 'name': 'Lerwick (S. Screen)', 'region': 'os', 'unitaryAuthArea': 'Shetland Islands'}, {'elevation': '57.0', 'id': '3008', 'latitude': '59.527', 'longitude': '-1.628', 'name': 'Fair Isle', 'region': 'os

## Hourly data for the past 24 hours for a given locations

In [3]:
def gather_location_data(location_id: str) -> pd.DataFrame:
    """
    This function gathers weather data for a specified location using MET Office's API

    Args:
        location_id (str): specified location


    Returns:
        location_data_df (pd.DataFrame):data frame containing weather information
    """

    # Construct the URL using variables and parameters
    url = f"{URL_BASIC}/val/wxobs/all/{FORMAT}/{location_id}?res=hourly&key={MET_OFFICE_API_KEY}"

    # Make a GET request to the constructed URL
    response = requests.get(url=url)

    # Parse the response JSON
    response_json = response.json()

    # Extract relevant data from the JSON response
    data = response_json["SiteRep"]["DV"]["Location"]["Period"]

    # Extract location name from the response and convert to lowercase
    location_name = response_json["SiteRep"]["DV"]["Location"]["name"].lower().replace(" ", "_")

    # Initialize lists to store data
    location_id_list = []
    day = []
    wind_direction = []
    wind_gust = []
    wind_speed = []
    humidity = []
    pressure = []
    weather = []
    visibility = []
    temperature = []
    dew_point = []
    minutes_after_midnight = []

    # Loop through data and extract required information
    for item in data:
        for sample in item["Rep"]:
            # Append data to respective lists
            location_id_list.append(response_json["SiteRep"]["DV"]["Location"]["i"])
            day.append(item["value"])
            wind_direction.append(sample["D"])
            wind_gust.append(sample["G"])
            humidity.append(sample["H"])
            pressure.append(sample["P"])
            wind_speed.append(sample["S"])
            temperature.append(sample["T"])
            visibility.append(sample["V"])
            weather.append(sample["W"])
            dew_point.append(sample["Dp"])
            minutes_after_midnight.append(sample["$"])

    # Check if the extracted location ID matches the specified ID
    if not set(location_id_list) == {location_id}:
        raise ValueError("Location ID doesn't match specified ID value")

    # Create a pandas DataFrame from the extracted data
    location_data_df = pd.DataFrame(data={
        "location_id": location_id,
        "day": day,
        "minutes_after_midnight": minutes_after_midnight,
        "temperature": temperature,
        "weather": weather,
        "humidity": humidity,
        "wind_direction": wind_direction,
        "wind_gust": wind_gust,
        "wind_speed": wind_speed,
        "pressure": pressure,
        "visibility": visibility,
        "dew_point": dew_point
    })

    location_data_df.to_csv(
        f"../data/weather_data//{location_id}_{location_name}_weather_data.csv",
        index=False
    )
    location_data_df.to_parquet(
        f"../data/weather_data//{location_id}_{location_name}_weather_data.parquet",
        index=False
    )
    # Return the DataFrame containing location data
    return location_data_df


In [4]:
interest_locations = [
    "3772", #Heathrow
    "3535", # Coleshill, very close to Birmingham
    "3316", # Crosby, very close to Liverpool
    "3351", # closest to Manchester 
    "3344", # Bingley Samos, very close to Leeds
    "3872", # Thorney Island, closest to Southampton and Portsmouth
    "3354", # Watnall, very close to Nottingham
]
for location in interest_locations:
    location_data_df = gather_location_data(location_id=location)
    print(location_data_df.head())

  location_id          day minutes_after_midnight temperature weather  \
0        3772  2024-01-26Z                   1140         6.3       0   
1        3772  2024-01-26Z                   1200         5.7       0   
2        3772  2024-01-26Z                   1260         4.4       0   
3        3772  2024-01-26Z                   1320         5.1       0   
4        3772  2024-01-26Z                   1380         2.4       0   

  humidity wind_direction wind_gust wind_speed pressure visibility dew_point  
0     66.2             SW        15          9     1034      27000       0.6  
1     72.3             SW        14          8     1034      25000       1.2  
2     77.4            WSW        13          9     1034      22000       0.9  
3     76.5              W        13          5     1035      22000       1.4  
4     78.4            WSW         9          6     1035      22000      -0.9  
  location_id          day minutes_after_midnight temperature weather  \
0        3535 

In [5]:
weather_dict = {
    0: "Clear night",
    1: "Sunny day",
    2: "Partly cloudy (night)",
    3: "Partly cloudy (day)",
    4: "Not used",
    5: "Mist",
    6: "Fog",
    7: "Cloudy",
    8: "Overcast",
    9: "Light rain shower (night)",
    10: "Light rain shower (day)",
    11: "Drizzle",
    12: "Light rain",
    13: "Heavy rain shower (night)",
    14: "Heavy rain shower (day)",
    15: "Heavy rain",
    16: "Sleet shower (night)",
    17: "Sleet shower (day)",
    18: "Sleet",
    19: "Hail shower (night)",
    20: "Hail shower (day)",
    21: "Hail",
    22: "Light snow shower (night)",
    23: "Light snow shower (day)",
    24: "Light snow",
    25: "Heavy snow shower (night)",
    26: "Heavy snow shower (day)",
    27: "Heavy snow",
    28: "Thunder shower (night)",
    29: "Thunder shower (day)",
    30: "Thunder?"
}