In [None]:
import json
import requests
import numpy as np
import pandas as pd

In [None]:
import functools

# logging, timing, arguments

In [None]:
def extract_weather_data(api_url: str):
    response = requests.get(url=api_url)
    response.raise_for_status()
    return response.json()

In [None]:
# convert the data into an hourly data frame (or timestamped by time)
# each row is a time, and all of the hourly attributes at that time
def transform_weather_data(response_data: dict):
    '''Takes an open-meteo api response json in form of a python dict'''
    # load response_data into a raw dataframe
    df = pd.DataFrame.from_dict(data=response_data)
    
    columns_to_extract = df.index

    hourly_data = {
        key: df.at[key, 'hourly']
        for key in columns_to_extract
    }

    hourly_df = pd.DataFrame(data=hourly_data)

    # parse local time into localized datetime
    hourly_df['time'] = pd.to_datetime(hourly_df['time'])
    hourly_df['time'] = hourly_df['time'].dt.tz_localize('America/Los_Angeles')
    
    # calc utc_time from 
    hourly_df['utc_time'] = hourly_df['time'].dt.tz_convert('UTC')
    
    # set utc_time as index
    hourly_df.set_index('utc_time', inplace=True)

    return hourly_df

In [None]:
# pull data from open-meteo api
try:
    # get most recent (current day - 2) hourly data: temp(2m), rel_humid(2m), apparent_temp, uv_index, is_day_or_night
    # location: san francisco
    # timezone: americas/los angeles (gmt - 7/8)
    # BUT store using utc timestamp
    open_meteo_endpoint ='https://historical-forecast-api.open-meteo.com/v1/forecast?latitude=37.7749&longitude=-122.4194&start_date=2025-07-16&end_date=2025-07-16&hourly=temperature_2m,apparent_temperature,uv_index,is_day,relative_humidity_2m&timezone=America%2FLos_Angeles&temperature_unit=fahrenheit'
    response_data = extract_weather_data(api_url=open_meteo_endpoint)
except Exception as e:
    print(f'Error: {e}')


In [None]:
df = pd.DataFrame.from_dict(data=response_data)
    
print(df)

In [None]:
df.info()

In [None]:
df = pd.DataFrame.from_dict(data=response_data)

# convert the data into an hourly data frame (or timestamped by time)
# each row is a time, and all of the hourly attributes at that time

columns_to_extract = df.index

hourly_data = {
    key: df.at[key, 'hourly']
    for key in columns_to_extract
}

hourly_df = pd.DataFrame(data=hourly_data)
# parse local time into localized datetime
hourly_df['time'] = pd.to_datetime(hourly_df['time'])
hourly_df['time'] = hourly_df['time'].dt.tz_localize('America/Los_Angeles')
# calc utc_time
hourly_df['utc_time'] = hourly_df['time'].dt.tz_convert('UTC')

# drop localized datetime
hourly_df.drop(columns=['time'], inplace=True)

hourly_df

In [None]:
hourly_df.dtypes

# Load into Postgres

In [None]:
import psycopg2

# use context managers in finalized .py files
conn = psycopg2.connect(
    database='weather_db',
    user='postgres',
    password='example',
    host='localhost',
    port=5432
)

cur = conn.cursor()
# cur.execute('DROP TABLE sf_hourly_weather')


In [None]:
from io import StringIO

# create table if doesnt exist
cur.execute('''
CREATE TABLE IF NOT EXISTS sf_hourly_weather (
    temperature_2m_f REAL,
    apparent_temperature_f REAL,
    uv_index REAL,
    is_day BOOLEAN,
    relative_humidity_2m_perc REAL,
    utc_time TIMESTAMPTZ PRIMARY KEY
)
''')

with StringIO() as buffer:
    hourly_df.to_csv(buffer, index=False, header=False)
    buffer.seek(0)
    cur.copy_from(buffer, table='sf_hourly_weather', sep=',')

In [None]:
print(type(conn))

In [None]:
cur.execute('select * from sf_hourly_weather')
result_list = cur.fetchall()
columns = [col.name for col in cur.description]
query_df = pd.DataFrame(data=result_list, columns=columns)

# query_df = pd.read_sql(sql="select utc_time at time zone 'America/Los_Angeles', * from sf_hourly_weather", con=conn)
query_df