In [66]:
from datetime import datetime
from dotenv import load_dotenv
from os import getenv
import pandas as pd
import requests

load_dotenv()
pd.set_option('display.max_columns', None)

In [67]:
df = pd.read_csv('races.csv')
df.when = pd.to_datetime(df.when)
df.head(2)

Unnamed: 0,division,when,team1,team2,time_tot,win,hurdles,name1,start_chng1,time1,name2,start_chng2,time2,name3,start_chng3,time3,name4,start_chng4,time4,event,course_color
0,1,2023-04-22 10:11:57,WildRunners dreamers,WildRunners Maximini,17.01,L,25.0,Sunny,-0.24,4.21,Dixie,0.3,4.24,Alex,0.16,4.22,Hector,0.0,4.13,Hanácké škvarek (Heroltice),blue
1,1,2023-04-22 10:11:57,WildRunners Maximini,WildRunners dreamers,19.43,L,25.0,Brooke,-0.11,4.08,Rocky,1.29,5.15,Bella,0.23,4.52,Bree,0.0,4.28,Hanácké škvarek (Heroltice),red


In [99]:
coordinates = {}

def coordinates_of(city):
    if city in coordinates:
        return coordinates[city]['lat'], coordinates[city]['lon']
    
    country = 'HU' if city == 'Tápiószentmárton' else 'CZ'
    url = f"https://api.openweathermap.org/geo/1.0/direct?q={city},{country}&appid={getenv('API_KEY')}"
    response = requests.get(url)
    data = response.json()[0]
    coordinates[city] = {
        'lat': data['lat'],
        'lon': data['lon']
    }
    return data['lat'], data['lon']

In [69]:
def get_timestamp(date, hour):
    return int(datetime.strptime(f'{date} {hour}:00:00', '%Y-%m-%d %H:%M:%S').timestamp())

In [97]:
def get_weather(city, date, hour):
    lat, lon = coordinates_of(city)
    timestamp = get_timestamp(date, hour)
    url = f"https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={timestamp}&units=metric&appid={getenv('API_KEY')}"
    response = requests.get(url)
    data = response.json()['data'][0]
    return data



In [71]:
def date2city(date):
    return df[df.when.dt.date == date].iloc[0].event.split('(')[1].split(')')[0]

In [72]:
days = pd.Series(df.when.dt.date.unique())
#hours = pd.Series(df.when.dt.time).apply(lambda x: x.hour).unique()

In [117]:
weather_df = pd.DataFrame({
    'date': days,
    'city': days.apply(date2city)
})
weather_df = weather_df.loc[weather_df.index.repeat(11)].reset_index(drop=True)
weather_df['hour'] = list(range(8, 19)) * len(days)
weather_df['temperature'] = 0.0
weather_df['feels_like'] = 0.0
weather_df['humidity'] = 0
weather_df['wind_speed'] = 0.0
weather_df['weather'] = ''
weather_df['rain'] = 0.0
weather_df.head()

Unnamed: 0,date,city,hour,temperature,feels_like,humidity,wind_speed,weather,rain
0,2023-04-22,Heroltice,8,0.0,0.0,0,0.0,,0.0
1,2023-04-22,Heroltice,9,0.0,0.0,0,0.0,,0.0
2,2023-04-22,Heroltice,10,0.0,0.0,0,0.0,,0.0
3,2023-04-22,Heroltice,11,0.0,0.0,0,0.0,,0.0
4,2023-04-22,Heroltice,12,0.0,0.0,0,0.0,,0.0


In [116]:
def fill_weather_data(df):
    for i, row in df.iterrows():
        print(row.city, row.date, row.hour)
        weather = get_weather(row.city, str(row.date), str(row.hour))
        df.loc[i, 'temperature'] = weather['temp']
        df.loc[i, 'feels_like'] = weather['feels_like']
        df.loc[i, 'humidity'] = weather['humidity']
        df.loc[i, 'wind_speed'] = weather['wind_speed']
        df.loc[i, 'weather'] = weather['weather'][0]['main']
        df.loc[i, 'rain'] = weather['rain']['1h'] if 'rain' in weather else 0.0

In [118]:
fill_weather_data(weather_df)

Heroltice 2023-04-22 8
Heroltice 2023-04-22 9
Heroltice 2023-04-22 10
Heroltice 2023-04-22 11
Heroltice 2023-04-22 12
Heroltice 2023-04-22 13
Heroltice 2023-04-22 14
Heroltice 2023-04-22 15
Heroltice 2023-04-22 16
Heroltice 2023-04-22 17
Heroltice 2023-04-22 18
Heroltice 2023-04-23 8
Heroltice 2023-04-23 9
Heroltice 2023-04-23 10
Heroltice 2023-04-23 11
Heroltice 2023-04-23 12
Heroltice 2023-04-23 13
Heroltice 2023-04-23 14
Heroltice 2023-04-23 15
Heroltice 2023-04-23 16
Heroltice 2023-04-23 17
Heroltice 2023-04-23 18
Žamberk 2023-05-06 8
Žamberk 2023-05-06 9
Žamberk 2023-05-06 10
Žamberk 2023-05-06 11
Žamberk 2023-05-06 12
Žamberk 2023-05-06 13
Žamberk 2023-05-06 14
Žamberk 2023-05-06 15
Žamberk 2023-05-06 16
Žamberk 2023-05-06 17
Žamberk 2023-05-06 18
Žamberk 2023-05-07 8
Žamberk 2023-05-07 9
Žamberk 2023-05-07 10
Žamberk 2023-05-07 11
Žamberk 2023-05-07 12
Žamberk 2023-05-07 13
Žamberk 2023-05-07 14
Žamberk 2023-05-07 15
Žamberk 2023-05-07 16
Žamberk 2023-05-07 17
Žamberk 2023-05-07

In [119]:
weather_df

Unnamed: 0,date,city,hour,temperature,feels_like,humidity,wind_speed,weather,rain
0,2023-04-22,Heroltice,8,8.99,7.80,74,2.26,Clear,0.00
1,2023-04-22,Heroltice,9,11.86,10.80,65,2.26,Clear,0.00
2,2023-04-22,Heroltice,10,15.58,14.50,50,2.58,Clear,0.00
3,2023-04-22,Heroltice,11,17.82,16.88,47,2.80,Clear,0.00
4,2023-04-22,Heroltice,12,18.67,17.72,43,3.04,Clear,0.00
...,...,...,...,...,...,...,...,...,...
237,2023-07-30,Tápiószentmárton,14,21.93,22.45,87,0.45,Rain,2.58
238,2023-07-30,Tápiószentmárton,15,21.58,22.04,86,0.45,Rain,2.41
239,2023-07-30,Tápiószentmárton,16,21.88,22.45,89,2.22,Rain,1.35
240,2023-07-30,Tápiószentmárton,17,23.02,23.47,80,1.23,Rain,0.46


In [120]:
weather_df.to_csv('weather.csv', index=False)