In [1]:
import pandas as pd
import requests
import datetime

In [2]:
WEATHER_ENDPOINT = "https://api.openweathermap.org/data/2.5/weather"
API_KEY = "31bed09c1ffc8fee3c747ede305686bc"

In [5]:
provinces = {
    "Chiang Mai" :{
        "lat": 18.7883,
        "lon": 98.9853
    }
}

In [7]:
def get_weather_data(province:str):
    param = {
        "lat": provinces[province]["lat"],
        "lon": provinces[province]["lon"],
        "appid": API_KEY,
        "units": "metric"
    }
    try:
        response = requests.get(WEATHER_ENDPOINT, params=param)
        response.raise_for_status()  
        data = response.json()
        
        timestamp = datetime.datetime.fromtimestamp(data['dt'])
        
        weather_dict = {
            'timestamp': timestamp,
            'year': timestamp.year,
            'month': timestamp.month,
            'day': timestamp.day,
            'hour': timestamp.hour,
            'minute': timestamp.minute,
            'requested_province':province,
            'location': data['name'],
            'weather_main': data['weather'][0]['main'],
            'weather_description': data['weather'][0]['description'],
            'main.temp': data['main']['temp']
        }
                
        return weather_dict
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching weather data: {e}")
        return None

In [8]:
df=pd.DataFrame([get_weather_data(p) for p in list(provinces.keys())])
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   timestamp            1 non-null      datetime64[ns]
 1   year                 1 non-null      int64         
 2   month                1 non-null      int64         
 3   day                  1 non-null      int64         
 4   hour                 1 non-null      int64         
 5   minute               1 non-null      int64         
 6   requested_province   1 non-null      object        
 7   location             1 non-null      object        
 8   weather_main         1 non-null      object        
 9   weather_description  1 non-null      object        
 10  main.temp            1 non-null      float64       
dtypes: datetime64[ns](1), float64(1), int64(5), object(4)
memory usage: 220.0+ bytes


Unnamed: 0,timestamp,year,month,day,hour,minute,requested_province,location,weather_main,weather_description,main.temp
0,2025-04-30 11:25:39,2025,4,30,11,25,Chiang Mai,Chiang Mai,Clouds,scattered clouds,30.76


In [13]:
df.to_parquet(
    'weather_data.parquet',
    engine='pyarrow',
    partition_cols=['year','month','day','hour']
)