## Import dependencies

In [1]:
import requests
import pandas as pd 
import os 
import transform_functions as tf
from credentials import *  

## Get Australian Capital Cities
Read in CSV of australian capital cities

In [2]:
capital_cities_df = pd.read_csv(os.path.join("..", "data", "australian_capital_cities.csv"))

## Get Weather Data 
Get weather data by requesting from openweathermap REST APIs for each capital city 

In [3]:
weather_data = []
for city_name in capital_cities_df["city_name"]:
    params = {
        "q": city_name,
        "appid": api_key
    }   
    response = requests.get(f"http://api.openweathermap.org/data/2.5/weather", params=params)
    if response.status_code == 200: 
        weather_data.append(requests.get(f"http://api.openweathermap.org/data/2.5/weather", params=params).json())
    else: 
        raise Exception("Extracting weather api data failed. Please check if API limits have been reached.")

## Read JSON data into Pandas DataFrame

In [4]:
weather_df = pd.json_normalize(weather_data)

## Convert unix timestamp to datetime timestamp string 

In [5]:
date_fixed_weather_df = tf.convert_unix_timestamp(input_df = weather_df, date_columns=["dt"])
date_fixed_weather_df.head()

Unnamed: 0,weather,base,visibility,dt,timezone,id,name,cod,coord.lon,coord.lat,...,wind.speed,wind.deg,wind.gust,rain.1h,clouds.all,sys.type,sys.id,sys.country,sys.sunrise,sys.sunset
0,"[{'id': 501, 'main': 'Rain', 'description': 'm...",stations,10000,2021-12-01 14:03:41,39600,2172517,Canberra,200,149.1281,-35.2835,...,0.89,77,2.68,2.66,40,2,2004200,AU,1638384133,1638435824
1,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:08:20,39600,2147714,Sydney,200,151.2073,-33.8679,...,1.34,20,2.24,,6,2,2001174,AU,1638383850,1638435109
2,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:08:31,34200,2073124,Darwin,200,130.8418,-12.4611,...,2.57,290,,,0,1,9574,AU,1638304910,1638350982
3,"[{'id': 803, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:06:47,36000,2174003,Brisbane,200,153.0281,-27.4679,...,1.34,177,3.58,,75,2,2005393,AU,1638384296,1638433788
4,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:03:52,37800,2078025,Adelaide,200,138.6,-34.9333,...,3.6,190,,,0,2,2001763,AU,1638386713,1638438299


## Replace column names

In [6]:
clean_weather_df = tf.replace_column_character(date_fixed_weather_df, {".": "_"})
clean_weather_df.head()

Unnamed: 0,weather,base,visibility,dt,timezone,id,name,cod,coord_lon,coord_lat,...,wind_speed,wind_deg,wind_gust,rain_1h,clouds_all,sys_type,sys_id,sys_country,sys_sunrise,sys_sunset
0,"[{'id': 501, 'main': 'Rain', 'description': 'm...",stations,10000,2021-12-01 14:03:41,39600,2172517,Canberra,200,149.1281,-35.2835,...,0.89,77,2.68,2.66,40,2,2004200,AU,1638384133,1638435824
1,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:08:20,39600,2147714,Sydney,200,151.2073,-33.8679,...,1.34,20,2.24,,6,2,2001174,AU,1638383850,1638435109
2,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:08:31,34200,2073124,Darwin,200,130.8418,-12.4611,...,2.57,290,,,0,1,9574,AU,1638304910,1638350982
3,"[{'id': 803, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:06:47,36000,2174003,Brisbane,200,153.0281,-27.4679,...,1.34,177,3.58,,75,2,2005393,AU,1638384296,1638433788
4,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:03:52,37800,2078025,Adelaide,200,138.6,-34.9333,...,3.6,190,,,0,2,2001763,AU,1638386713,1638438299


## Rename fields

In [7]:
clean_weather_df = clean_weather_df.rename(columns={
    "id":"city_id", 
    "dt": "datetime"
})
clean_weather_df.head()

Unnamed: 0,weather,base,visibility,datetime,timezone,city_id,name,cod,coord_lon,coord_lat,...,wind_speed,wind_deg,wind_gust,rain_1h,clouds_all,sys_type,sys_id,sys_country,sys_sunrise,sys_sunset
0,"[{'id': 501, 'main': 'Rain', 'description': 'm...",stations,10000,2021-12-01 14:03:41,39600,2172517,Canberra,200,149.1281,-35.2835,...,0.89,77,2.68,2.66,40,2,2004200,AU,1638384133,1638435824
1,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:08:20,39600,2147714,Sydney,200,151.2073,-33.8679,...,1.34,20,2.24,,6,2,2001174,AU,1638383850,1638435109
2,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:08:31,34200,2073124,Darwin,200,130.8418,-12.4611,...,2.57,290,,,0,1,9574,AU,1638304910,1638350982
3,"[{'id': 803, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:06:47,36000,2174003,Brisbane,200,153.0281,-27.4679,...,1.34,177,3.58,,75,2,2005393,AU,1638384296,1638433788
4,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:03:52,37800,2078025,Adelaide,200,138.6,-34.9333,...,3.6,190,,,0,2,2001763,AU,1638386713,1638438299


## Create City DataFrame

In [10]:
city_df = clean_weather_df[["city_id", "name", "coord_lon", "coord_lat"]].drop_duplicates() 
city_df.head()

Unnamed: 0,city_id,name,coord_lon,coord_lat
0,2172517,Canberra,149.1281,-35.2835
1,2147714,Sydney,151.2073,-33.8679
2,2073124,Darwin,130.8418,-12.4611
3,2174003,Brisbane,153.0281,-27.4679
4,2078025,Adelaide,138.6,-34.9333


## Create Temperature DataFrame

In [11]:
temperature_df = clean_weather_df[["city_id", "datetime", "main_temp", "main_feels_like", "main_temp_min", "main_temp_max"]]
temperature_df.head()

Unnamed: 0,city_id,datetime,main_temp,main_feels_like,main_temp_min,main_temp_max
0,2172517,2021-12-01 14:03:41,288.45,288.15,287.14,289.68
1,2147714,2021-12-01 14:08:20,291.64,291.89,289.84,292.72
2,2073124,2021-12-01 14:08:31,302.08,307.49,301.98,302.53
3,2174003,2021-12-01 14:06:47,294.75,295.21,293.87,295.74
4,2078025,2021-12-01 14:03:52,291.58,291.33,288.62,292.83


## Create Atmosphere DataFrame

In [12]:
atmosphere_df = clean_weather_df[["city_id", "datetime", 'main_pressure', 'main_humidity',
       'wind_speed', 'wind_deg', 'clouds_all', 'wind_gust']]
atmosphere_df.head()

Unnamed: 0,city_id,datetime,main_pressure,main_humidity,wind_speed,wind_deg,clouds_all,wind_gust
0,2172517,2021-12-01 14:03:41,1016,81,0.89,77,40,2.68
1,2147714,2021-12-01 14:08:20,1016,90,1.34,20,6,2.24
2,2073124,2021-12-01 14:08:31,1008,79,2.57,290,0,
3,2174003,2021-12-01 14:06:47,1013,86,1.34,177,75,3.58
4,2078025,2021-12-01 14:03:52,1012,71,3.6,190,0,


## Create SQL Connection

In [22]:
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
from sqlalchemy.dialects import postgresql
from urllib.parse import quote_plus as urlquote
connection_url = URL.create(
    drivername = "postgresql", 
    username = db_user,
    password = db_password,
    host = "localhost", 
    port = 5432,
    database = "weather_db", 
)

engine = create_engine(connection_url)

## Reflect ORM

In [31]:
from sqlalchemy import MetaData
metadata_obj = MetaData()
metadata_obj.reflect(bind=engine)
city = metadata_obj.tables["city"]
temperature = metadata_obj.tables["temperature"]
atmosphere = metadata_obj.tables["atmosphere"]


In [32]:
insert_statement = postgresql.insert(city).values(city_df.to_dict(orient='records'))
upsert_statement = insert_statement.on_conflict_do_update(
    index_elements=['city_id'],
    set_={c.key: c for c in insert_statement.excluded if c.key != 'city_id'})
engine.execute(upsert_statement)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fb52018c0d0>