In [2]:
from datetime import datetime

import pandas as pd
import requests
import numpy as np

df = pd.read_csv('../data/ergast/races.csv')
df = df.merge(pd.read_csv('../data/ergast/circuits.csv'), on='circuitId')
visited_races = {}

In [3]:
WMO_LABELS = {
    "dry": [range(0, 20)],
    "rain": [range(20, 22), range(24, 26), range(50, 60), range(60, 68), range(80, 83), range(87, 90), range(91, 93)],
    "dust": [range(30, 50)],
    "snow": [22, 23, 26, range(68, 80), 83, 84, 85, 86, 93, 94]
}


def get_label(code):
    for label, codes in WMO_LABELS.items():
        if any(code in code_range for code_range in codes):
            return label
    return "Unknown"


def parse_time(time_str):
    # check if time_str is string
    if not isinstance(time_str, str):
        time_str = '12:00:00'
    return datetime.strptime(time_str, '%H:%M:%S').time()

In [None]:
df = df.replace('\\N', np.nan)

In [None]:
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from multiprocessing import Pool
from threading import Thread
import numpy as np


def get_weather_data(row):
    race_id = row['raceId']
    date = row['date']

    if race_id not in visited_races or np.isnan(visited_races[race_id]['humidity']):
        try:
            url = "https://archive-api.open-meteo.com/v1/archive"
            params = {
                "latitude": row["lat"],
                "longitude": row["lng"],
                "start_date": date,
                "end_date": date,
                "hourly": "relativehumidity_2m,weathercode,temperature_2m"
            }

            response = requests.get(url, params=params)
            data = response.json()

            weather_data = data['hourly']
            weathercode = weather_data['weathercode']
            humidity_data = weather_data['relativehumidity_2m']
            temperature_data = weather_data['temperature_2m']
            hour_race = parse_time(row['time'])
            weather_condition = get_label(weathercode[hour_race.hour])
            humidity = np.mean(humidity_data[hour_race.hour:hour_race.hour + 3])
            temperature = np.mean(temperature_data[hour_race.hour:hour_race.hour + 3])
            wmo_code = weathercode[hour_race.hour]

        except Exception as e:
            print("Error: ", e)
            weather_condition = np.nan
            humidity = np.nan
            wmo_code = np.nan
            temperature = np.nan

        visited_races[race_id] = {'weather_condition': weather_condition, 'humidity': humidity, 'wmo_code': wmo_code,
                                  'temperature': temperature}


def get_weather_data_parallel(df):
    with ThreadPoolExecutor() as pool:
        results = list(tqdm(pool.map(get_weather_data, df.to_dict('records')), total=len(df)))

    return results


get_weather_data_parallel(df[pd.to_datetime(df['date']) < datetime.now()])

In [4]:
def get_forecat():
    url = 'https://api.open-meteo.com/v1/forecast?latitude=47.2197&longitude=14.7647&start_date=2023-07-02&end_date=2023-07-02&hourly=relativehumidity_2m%2Cweathercode%2Ctemperature_2m'

    response = requests.get(url)
    data = response.json()

    weather_data = data['hourly']

    weathercode = weather_data['weathercode']
    humidity_data = weather_data['relativehumidity_2m']
    temperature_data = weather_data['temperature_2m']
    time = "13:00:00"
    time = datetime.strptime(time, '%H:%M:%S').time()
    weather_condition = get_label(weathercode[time.hour])
    humidity = np.mean(humidity_data[time.hour:time.hour + 3])
    temperature = np.mean(temperature_data[time.hour:time.hour + 3])

    print(weather_condition, humidity, temperature)

get_forecat()

rain 67.66666666666667 23.066666666666666


In [None]:
import json

# visited_races to df
df_wh = pd.read_json(json.dumps(visited_races), orient='index')
df_wh.to_csv('data/weather.csv')

Unnamed: 0,raceId,year,round,circuitId,name_x,date,time,url_x,fp1_date,fp1_time,...,sprint_date,sprint_time,circuitRef,name_y,location,country,lat,lng,alt,url_y
235,833,1950,1,9,British Grand Prix,1950-05-13,\N,http://en.wikipedia.org/wiki/1950_British_Gran...,\N,\N,...,\N,\N,silverstone,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153.0,http://en.wikipedia.org/wiki/Silverstone_Circuit
170,834,1950,2,6,Monaco Grand Prix,1950-05-21,\N,http://en.wikipedia.org/wiki/1950_Monaco_Grand...,\N,\N,...,\N,\N,monaco,Circuit de Monaco,Monte-Carlo,Monaco,43.7347,7.42056,7.0,http://en.wikipedia.org/wiki/Circuit_de_Monaco
684,835,1950,3,19,Indianapolis 500,1950-05-30,\N,http://en.wikipedia.org/wiki/1950_Indianapolis...,\N,\N,...,\N,\N,indianapolis,Indianapolis Motor Speedway,Indianapolis,USA,39.7950,-86.23470,223.0,http://en.wikipedia.org/wiki/Indianapolis_Moto...
1059,836,1950,4,66,Swiss Grand Prix,1950-06-04,\N,http://en.wikipedia.org/wiki/1950_Swiss_Grand_...,\N,\N,...,\N,\N,bremgarten,Circuit Bremgarten,Bern,Switzerland,46.9589,7.40194,551.0,http://en.wikipedia.org/wiki/Circuit_Bremgarten
376,837,1950,5,13,Belgian Grand Prix,1950-06-18,\N,http://en.wikipedia.org/wiki/1950_Belgian_Gran...,\N,\N,...,\N,\N,spa,Circuit de Spa-Francorchamps,Spa,Belgium,50.4372,5.97139,401.0,http://en.wikipedia.org/wiki/Circuit_de_Spa-Fr...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1075,1116,2023,19,69,United States Grand Prix,2023-10-22,19:00:00,https://en.wikipedia.org/wiki/2023_United_Stat...,2023-10-20,17:30:00,...,2023-10-21,22:00:00,americas,Circuit of the Americas,Austin,USA,30.1328,-97.64110,161.0,http://en.wikipedia.org/wiki/Circuit_of_the_Am...
849,1117,2023,20,32,Mexico City Grand Prix,2023-10-29,20:00:00,https://en.wikipedia.org/wiki/2023_Mexico_City...,2023-10-27,18:30:00,...,\N,\N,rodriguez,Autódromo Hermanos Rodríguez,Mexico City,Mexico,19.4042,-99.09070,2227.0,http://en.wikipedia.org/wiki/Aut%C3%B3dromo_He...
549,1118,2023,21,18,São Paulo Grand Prix,2023-11-05,17:00:00,https://en.wikipedia.org/wiki/2023_S%C3%A3o_Pa...,2023-11-03,14:30:00,...,2023-11-04,18:30:00,interlagos,Autódromo José Carlos Pace,São Paulo,Brazil,-23.7036,-46.69970,785.0,http://en.wikipedia.org/wiki/Aut%C3%B3dromo_Jo...
1101,1119,2023,22,80,Las Vegas Grand Prix,2023-11-19,06:00:00,https://en.wikipedia.org/wiki/2023_Las_Vegas_G...,2023-11-17,04:30:00,...,\N,\N,vegas,Las Vegas Strip Street Circuit,Las Vegas,United States,36.1147,-115.17300,0.6,https://en.wikipedia.org/wiki/Las_Vegas_Grand_...
