In [102]:
import json
import requests
import numpy as np
import pandas as pd

In [103]:
import functools

# logging, timing, arguments

In [104]:
def extract_weather_data(api_url: str):
    response = requests.get(url=api_url)
    response.raise_for_status()
    return response.json()

In [105]:
# convert the data into an hourly data frame (or timestamped by time)
# each row is a time, and all of the hourly attributes at that time
def transform_weather_data(response_data: dict):
    '''Takes an open-meteo api response json in form of a python dict'''
    # load response_data into a raw dataframe
    df = pd.DataFrame.from_dict(data=response_data)
    
    columns_to_extract = df.index

    hourly_data = {
        key: df.at[key, 'hourly']
        for key in columns_to_extract
    }

    hourly_df = pd.DataFrame(data=hourly_data)

    # parse local time into localized datetime
    hourly_df['time'] = pd.to_datetime(hourly_df['time'])
    hourly_df['time'] = hourly_df['time'].dt.tz_localize('America/Los_Angeles')
    
    # calc utc_time from 
    hourly_df['utc_time'] = hourly_df['time'].dt.tz_convert('UTC')
    
    # set utc_time as index
    hourly_df.set_index('utc_time', inplace=True)

    return hourly_df

In [106]:
# pull data from open-meteo api
try:
    # get most recent (current day - 2) hourly data: temp(2m), rel_humid(2m), apparent_temp, uv_index, is_day_or_night
    # location: san francisco
    # timezone: americas/los angeles (gmt - 7/8)
    # BUT store using utc timestamp
    open_meteo_endpoint ='https://historical-forecast-api.open-meteo.com/v1/forecast?latitude=37.7749&longitude=-122.4194&start_date=2025-07-16&end_date=2025-07-16&hourly=temperature_2m,apparent_temperature,uv_index,is_day,relative_humidity_2m&timezone=America%2FLos_Angeles&temperature_unit=fahrenheit'
    response_data = extract_weather_data(api_url=open_meteo_endpoint)
except Exception as e:
    print(f'Error: {e}')


In [107]:
df = pd.DataFrame.from_dict(data=response_data)
    
print(df)

                       latitude  longitude  generationtime_ms  \
time                  37.763283 -122.41286         110.043526   
temperature_2m        37.763283 -122.41286         110.043526   
apparent_temperature  37.763283 -122.41286         110.043526   
uv_index              37.763283 -122.41286         110.043526   
is_day                37.763283 -122.41286         110.043526   
relative_humidity_2m  37.763283 -122.41286         110.043526   

                      utc_offset_seconds             timezone  \
time                              -25200  America/Los_Angeles   
temperature_2m                    -25200  America/Los_Angeles   
apparent_temperature              -25200  America/Los_Angeles   
uv_index                          -25200  America/Los_Angeles   
is_day                            -25200  America/Los_Angeles   
relative_humidity_2m              -25200  America/Los_Angeles   

                     timezone_abbreviation  elevation hourly_units  \
time              

In [108]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, time to relative_humidity_2m
Data columns (total 9 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   latitude               6 non-null      float64
 1   longitude              6 non-null      float64
 2   generationtime_ms      6 non-null      float64
 3   utc_offset_seconds     6 non-null      int64  
 4   timezone               6 non-null      object 
 5   timezone_abbreviation  6 non-null      object 
 6   elevation              6 non-null      float64
 7   hourly_units           6 non-null      object 
 8   hourly                 6 non-null      object 
dtypes: float64(4), int64(1), object(4)
memory usage: 480.0+ bytes


In [109]:
df = pd.DataFrame.from_dict(data=response_data)

# convert the data into an hourly data frame (or timestamped by time)
# each row is a time, and all of the hourly attributes at that time

columns_to_extract = df.index

hourly_data = {
    key: df.at[key, 'hourly']
    for key in columns_to_extract
}

hourly_df = pd.DataFrame(data=hourly_data)
# parse local time into localized datetime
hourly_df['time'] = pd.to_datetime(hourly_df['time'])
hourly_df['time'] = hourly_df['time'].dt.tz_localize('America/Los_Angeles')
# calc utc_time
hourly_df['utc_time'] = hourly_df['time'].dt.tz_convert('UTC')

# drop localized datetime
hourly_df.drop(columns=['time'], inplace=True)

hourly_df

Unnamed: 0,temperature_2m,apparent_temperature,uv_index,is_day,relative_humidity_2m,utc_time
0,59.5,57.6,0.0,0,84,2025-07-16 07:00:00+00:00
1,59.1,57.8,0.0,0,87,2025-07-16 08:00:00+00:00
2,58.9,57.5,0.0,0,87,2025-07-16 09:00:00+00:00
3,58.8,57.4,0.0,0,86,2025-07-16 10:00:00+00:00
4,58.8,57.0,0.0,0,83,2025-07-16 11:00:00+00:00
5,58.0,56.6,0.0,0,85,2025-07-16 12:00:00+00:00
6,57.7,56.3,0.0,0,85,2025-07-16 13:00:00+00:00
7,58.0,56.4,0.15,1,82,2025-07-16 14:00:00+00:00
8,58.8,57.0,0.8,1,80,2025-07-16 15:00:00+00:00
9,60.0,58.0,1.8,1,76,2025-07-16 16:00:00+00:00


In [110]:
hourly_df.dtypes

temperature_2m                      float64
apparent_temperature                float64
uv_index                            float64
is_day                                int64
relative_humidity_2m                  int64
utc_time                datetime64[ns, UTC]
dtype: object

# Load into Postgres

In [111]:
import psycopg2

# use context managers in finalized .py files
conn = psycopg2.connect(
    database='weather_db',
    user='postgres',
    password='example',
    host='localhost',
    port=5432
)

cur = conn.cursor()
# cur.execute('DROP TABLE sf_hourly_weather')


In [112]:
from io import StringIO

# create table if doesnt exist
cur.execute('''
CREATE TABLE IF NOT EXISTS sf_hourly_weather (
    temperature_2m_f REAL,
    apparent_temperature_f REAL,
    uv_index REAL,
    is_day BOOLEAN,
    relative_humidity_2m_perc REAL,
    utc_time TIMESTAMPTZ PRIMARY KEY
)
''')

with StringIO() as buffer:
    hourly_df.to_csv(buffer, index=False, header=False)
    buffer.seek(0)
    cur.copy_from(buffer, table='sf_hourly_weather', sep=',')

InvalidDatetimeFormat: invalid input syntax for type timestamp with time zone: "59.5"
CONTEXT:  COPY sf_hourly_weather, line 1, column time: "59.5"


In [None]:
print(type(conn))

<class 'psycopg2.extensions.connection'>


In [None]:
cur.execute('select * from sf_hourly_weather')
result_list = cur.fetchall()
columns = [col.name for col in cur.description]
query_df = pd.DataFrame(data=result_list, columns=columns)

# query_df = pd.read_sql(sql="select utc_time at time zone 'America/Los_Angeles', * from sf_hourly_weather", con=conn)
query_df

Unnamed: 0,temperature_2m_f,apparent_temperature_f,uv_index,is_day,relative_humidity_2m_perc,utc_time
0,59.5,57.6,0.0,False,84.0,2025-07-16 07:00:00+00:00
1,59.1,57.8,0.0,False,87.0,2025-07-16 08:00:00+00:00
2,58.9,57.5,0.0,False,87.0,2025-07-16 09:00:00+00:00
3,58.8,57.4,0.0,False,86.0,2025-07-16 10:00:00+00:00
4,58.8,57.0,0.0,False,83.0,2025-07-16 11:00:00+00:00
5,58.0,56.6,0.0,False,85.0,2025-07-16 12:00:00+00:00
6,57.7,56.3,0.0,False,85.0,2025-07-16 13:00:00+00:00
7,58.0,56.4,0.15,True,82.0,2025-07-16 14:00:00+00:00
8,58.8,57.0,0.8,True,80.0,2025-07-16 15:00:00+00:00
9,60.0,58.0,1.8,True,76.0,2025-07-16 16:00:00+00:00
