# Notebook to preprocess the meteo data.
Source: [Open Meteo](https://open-meteo.com/)

We preprocess the data since it won't be changed and we do not want to trigger the API unnecessarily.

In [1]:
import pandas as pd
import requests
import json

City coordinates we are interested in. (longitude, latitude)

In [2]:
city_coords = {
    "london": [-0.118092, 51.509865],
    "berlin": [13.404954, 52.520008],
    "amsterdam": [4.899431, 52.379189],
    "paris": [2.349014, 48.864716],
    "rome": [12.496366, 41.902782],
    "barcelona": [2.154007, 41.390205],
    "budapest": [19.040236, 47.497913],
    "athens": [23.727539, 37.983810],
    "lisbon": [-9.142685, 38.736946],
    "vienna": [16.363449, 48.210033],
}

## 1. Fetch daily precipitation hours for the whole year of 2022

In [3]:
base_url = "https://archive-api.open-meteo.com/v1/archive"

cities = []
days = []
rain_hours = []

for city in city_coords:
    params = {
        "latitude": city_coords[city][1],
        "longitude": city_coords[city][0],
        "start_date": "2022-01-01",
        "end_date": "2022-12-31",
        "daily": "precipitation_hours",
        "timezone": "Europe/Berlin",
    }

    response = requests.get(base_url, params=params)
    raw_data = response.json()

    cities.append(city)
    days.append(raw_data['daily']['time'])
    rain_hours.append(raw_data['daily']['precipitation_hours'])

df_all = pd.DataFrame({
    'city': cities,
    'days': days,
    'rain_hours': rain_hours,
})

In [4]:
df_all.to_csv("data_output/cities_rain_data_2022.csv", index=False)

## 2. Fetch average temperature for the whole year

In [5]:
base_url = "https://archive-api.open-meteo.com/v1/archive"

cities = []
days = []
mean_temps = []

for city in city_coords:
    params = {
        "latitude": city_coords[city][1],
        "longitude": city_coords[city][0],
        "start_date": "2022-01-01",
        "end_date": "2022-12-31",
        "daily": "temperature_2m_mean",
        "timezone": "Europe/Berlin",
    }

    response = requests.get(base_url, params=params)
    raw_data = response.json()

    cities.append(city)
    days.append(raw_data['daily']['time'])
    mean_temps.append(raw_data['daily']['temperature_2m_mean'])

df_all = pd.DataFrame({
    'city': cities,
    'days': days,
    'mean_temps': mean_temps,
})

In [6]:
df_all.to_csv("data_output/cities_mean_temps_2022.csv", index=False)