In [1]:
import json
import requests
import numpy as np
import pandas as pd

In [4]:
def extract_weather_data(api_url: str):
    response = requests.get(url=api_url)
    response.raise_for_status()
    return response.json()

In [5]:
# convert the data into an hourly data frame (or timestamped by time)
# each row is a time, and all of the hourly attributes at that time
def transform_weather_data(response_data: dict):
    '''Takes an open-meteo api response json in form of a python dict'''
    # load response_data into a raw dataframe
    df = pd.DataFrame.from_dict(data=response_data)
    
    columns_to_extract = df.index

    hourly_data = {
        key: df.at[key, 'hourly']
        for key in columns_to_extract
    }

    hourly_df = pd.DataFrame(data=hourly_data)

    # parse local time into localized datetime
    hourly_df['time'] = pd.to_datetime(hourly_df['time'])
    hourly_df['time'] = hourly_df['time'].dt.tz_localize('America/Los_Angeles')
    
    # calc utc_time
    hourly_df['utc_time'] = hourly_df['time'].dt.tz_convert('UTC')
    
    # set utc_time as index
    hourly_df.set_index('utc_time', inplace=True)

    return hourly_df

In [6]:
# pull data from open-meteo api
try:
    # get most recent (current day - 2) hourly data: temp(2m), rel_humid(2m), apparent_temp, uv_index, is_day_or_night
    # location: san francisco
    # timezone: americas/los angeles (gmt - 7/8)
    # BUT store using utc timestamp
    open_meteo_endpoint ='https://historical-forecast-api.open-meteo.com/v1/forecast?latitude=37.7749&longitude=-122.4194&start_date=2025-07-16&end_date=2025-07-16&hourly=temperature_2m,apparent_temperature,uv_index,is_day,relative_humidity_2m&timezone=America%2FLos_Angeles&temperature_unit=fahrenheit'
    response_data = extract_weather_data(api_url=open_meteo_endpoint)
except Exception as e:
    print(f'Error: {e}')


In [7]:
df = pd.DataFrame.from_dict(data=response_data)
    
print(df)

                       latitude  longitude  generationtime_ms  \
time                  37.763283 -122.41286           0.218987   
temperature_2m        37.763283 -122.41286           0.218987   
apparent_temperature  37.763283 -122.41286           0.218987   
uv_index              37.763283 -122.41286           0.218987   
is_day                37.763283 -122.41286           0.218987   
relative_humidity_2m  37.763283 -122.41286           0.218987   

                      utc_offset_seconds             timezone  \
time                              -25200  America/Los_Angeles   
temperature_2m                    -25200  America/Los_Angeles   
apparent_temperature              -25200  America/Los_Angeles   
uv_index                          -25200  America/Los_Angeles   
is_day                            -25200  America/Los_Angeles   
relative_humidity_2m              -25200  America/Los_Angeles   

                     timezone_abbreviation  elevation hourly_units  \
time              

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, time to relative_humidity_2m
Data columns (total 9 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   latitude               6 non-null      float64
 1   longitude              6 non-null      float64
 2   generationtime_ms      6 non-null      float64
 3   utc_offset_seconds     6 non-null      int64  
 4   timezone               6 non-null      object 
 5   timezone_abbreviation  6 non-null      object 
 6   elevation              6 non-null      float64
 7   hourly_units           6 non-null      object 
 8   hourly                 6 non-null      object 
dtypes: float64(4), int64(1), object(4)
memory usage: 480.0+ bytes


In [9]:
# convert the data into an hourly data frame (or timestamped by time)
# each row is a time, and all of the hourly attributes at that time

columns_to_extract = df.index

hourly_data = {
    key: df.at[key, 'hourly']
    for key in columns_to_extract
}

hourly_df = pd.DataFrame(data=hourly_data)
# parse local time into localized datetime
hourly_df['time'] = pd.to_datetime(hourly_df['time'])
hourly_df['time'] = hourly_df['time'].dt.tz_localize('America/Los_Angeles')
# calc utc_time
hourly_df['utc_time'] = hourly_df['time'].dt.tz_convert('UTC')
# set utc_time as index
hourly_df.set_index('utc_time', inplace=True)

hourly_df

Unnamed: 0_level_0,time,temperature_2m,apparent_temperature,uv_index,is_day,relative_humidity_2m
utc_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-07-16 07:00:00+00:00,2025-07-16 00:00:00-07:00,59.5,57.6,0.0,0,84
2025-07-16 08:00:00+00:00,2025-07-16 01:00:00-07:00,59.1,57.8,0.0,0,87
2025-07-16 09:00:00+00:00,2025-07-16 02:00:00-07:00,58.9,57.5,0.0,0,87
2025-07-16 10:00:00+00:00,2025-07-16 03:00:00-07:00,58.8,57.4,0.0,0,86
2025-07-16 11:00:00+00:00,2025-07-16 04:00:00-07:00,58.8,57.0,0.0,0,83
2025-07-16 12:00:00+00:00,2025-07-16 05:00:00-07:00,58.0,56.6,0.0,0,85
2025-07-16 13:00:00+00:00,2025-07-16 06:00:00-07:00,57.7,56.3,0.0,0,85
2025-07-16 14:00:00+00:00,2025-07-16 07:00:00-07:00,58.0,56.4,0.15,1,82
2025-07-16 15:00:00+00:00,2025-07-16 08:00:00-07:00,58.8,57.0,0.8,1,80
2025-07-16 16:00:00+00:00,2025-07-16 09:00:00-07:00,60.0,58.0,1.8,1,76


In [15]:
hourly_df.dtypes

time                    datetime64[ns, America/Los_Angeles]
temperature_2m                                      float64
apparent_temperature                                float64
uv_index                                            float64
is_day                                                int64
relative_humidity_2m                                  int64
dtype: object