In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm

In [2]:
def fetch_weather_data(start, end=None):

    params = {'date': start, 
                   'q': "paris,france",
                   'format': "json", 
                   'tp': "1", 
                   'key': "694c007d0c1b4641a3f155842240312"
                   }
    if end:
        params['enddate'] = end

    request = requests.get(url="https://api.worldweatheronline.com/premium/v1/past-weather.ashx", params=params)
    json_data = request.json()

    if json_data['data']['weather']:
        return json_data['data']['weather']
    else:
        print("Error when fetching weather data: ", json_data)

In [3]:
def generate_month_ranges(start_year, start_month):

    current_date = datetime.now()
    start_date = datetime(start_year, start_month, 1)
    month_ranges = []

    while start_date < current_date:
        end_date = (start_date + timedelta(days=32)).replace(day=1) - timedelta(days=1)
        if end_date > current_date:
            end_date = current_date
        month_ranges.append((start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")))
        start_date = end_date + timedelta(days=1)

    return month_ranges

In [4]:
def fetch_all_weather_data(start_year, start_month):
    
    month_ranges = generate_month_ranges(start_year, start_month)
    all_weather_data = []

    for start, end in tqdm(month_ranges, desc="Fetching weather data", unit="month"):
        data = fetch_weather_data(start=start, end=end)
        if data:
            all_weather_data.extend(data)

    return all_weather_data

In [5]:
data = fetch_all_weather_data(start_year=2023, start_month=1)

Fetching weather data: 100%|██████████| 24/24 [00:03<00:00,  6.81month/s]


In [6]:
def collect_general_weather_data(data):

    general_data = []  

    for i in range(len(data)):
        general_data.append({
            'date': data[i]['date'], 
            'maxtempC': data[i]['maxtempC'],
            'mintempC': data[i]['mintempC'],
            'avgtempC': data[i]['avgtempC'],
            'totalSnow_cm': data[i]['totalSnow_cm'],
            'sunHour': data[i]['sunHour'],
            'uvIndex': data[i]['uvIndex']
        })

    return pd.DataFrame(general_data)

general_df = collect_general_weather_data(data)
general_df

Unnamed: 0,date,maxtempC,mintempC,avgtempC,totalSnow_cm,sunHour,uvIndex
0,2023-01-01,14,12,13,0.0,2.0,4
1,2023-01-02,12,7,10,0.0,1.0,3
2,2023-01-03,11,6,8,0.0,6.0,3
3,2023-01-04,13,9,10,0.0,1.0,3
4,2023-01-05,13,11,12,0.0,1.0,3
...,...,...,...,...,...,...,...
699,2024-11-30,9,3,5,0.0,8.0,3
700,2024-12-01,11,5,8,0.0,8.0,3
701,2024-12-02,12,7,10,0.0,3.0,2
702,2024-12-03,9,5,8,0.0,1.0,2


In [7]:
def collect_hourly_weather_data(data):

    hourly_data = []  

    for i in range(len(data)):
        for j in range(24):
            
            hourly_data.append({
                'date': data[i]['date'],
                'time': data[i]['hourly'][j]['time'], 
                'tempC': data[i]['hourly'][j]['tempC'],
                'windspeedKmph': data[i]['hourly'][j]['windspeedKmph'],
                'winddirDegree': data[i]['hourly'][j]['winddirDegree'],
                'precipMM': data[i]['hourly'][j]['precipMM'],
                'humidity': data[i]['hourly'][j]['humidity'],
                'visibility': data[i]['hourly'][j]['visibility'],
                'pressure': data[i]['hourly'][j]['pressure'],
                'cloudcover': data[i]['hourly'][j]['cloudcover'],
                'HeatIndexC': data[i]['hourly'][j]['HeatIndexC'],
                'DewPointC': data[i]['hourly'][j]['DewPointC'],
                'WindChillC': data[i]['hourly'][j]['WindChillC'],
                'WindGustKmph': data[i]['hourly'][j]['WindGustKmph'],
                'FeelsLikeC': data[i]['hourly'][j]['FeelsLikeC'],
                'uvIndex': data[i]['hourly'][j]['uvIndex'],
            })

    return pd.DataFrame(hourly_data)

hourly_df = collect_hourly_weather_data(data)
hourly_df

Unnamed: 0,date,time,tempC,windspeedKmph,winddirDegree,precipMM,humidity,visibility,pressure,cloudcover,HeatIndexC,DewPointC,WindChillC,WindGustKmph,FeelsLikeC,uvIndex
0,2023-01-01,0,14,27,205,0.0,61,10,1015,23,14,6,11,50,11,1
1,2023-01-01,100,14,25,204,0.0,62,10,1016,31,14,6,11,47,11,1
2,2023-01-01,200,13,23,204,0.0,63,10,1016,40,13,7,12,44,12,1
3,2023-01-01,300,13,22,204,0.0,64,10,1016,48,13,7,11,42,11,1
4,2023-01-01,400,13,21,205,0.0,65,10,1016,41,13,7,11,40,11,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16891,2024-12-04,1900,7,10,210,0.0,72,10,1028,41,7,2,5,16,5,1
16892,2024-12-04,2000,7,11,214,0.0,72,10,1028,39,7,2,5,18,5,1
16893,2024-12-04,2100,7,11,216,0.0,74,10,1028,27,7,2,4,18,4,1
16894,2024-12-04,2200,7,12,223,0.0,74,10,1028,63,7,2,4,19,4,1
