## Import dependencies

In [2]:
import requests
import pandas as pd 
import os 
import transform_functions as tf
from credentials import *  

## Get Australian Capital Cities
Read in CSV of australian capital cities

In [3]:
capital_cities_df = pd.read_csv(os.path.join("..", "data", "australian_capital_cities.csv"))

## Get Weather Data 
Get weather data by requesting from openweathermap REST APIs for each capital city 

In [4]:
weather_data = []
for city_name in capital_cities_df["city_name"]:
    params = {
        "q": city_name,
        "appid": api_key
    }   
    response = requests.get(f"http://api.openweathermap.org/data/2.5/weather", params=params)
    if response.status_code == 200: 
        weather_data.append(requests.get(f"http://api.openweathermap.org/data/2.5/weather", params=params).json())
    else: 
        raise Exception("Extracting weather api data failed. Please check if API limits have been reached.")

## Read JSON data into Pandas DataFrame

In [5]:
weather_df = pd.json_normalize(weather_data)
weather_df.head()

Unnamed: 0,weather,base,visibility,dt,timezone,id,name,cod,coord.lon,coord.lat,...,main.humidity,wind.speed,wind.deg,clouds.all,sys.type,sys.id,sys.country,sys.sunrise,sys.sunset,wind.gust
0,"[{'id': 802, 'main': 'Clouds', 'description': ...",stations,10000,1638368789,39600,2172517,Canberra,200,149.1281,-35.2835,...,82,2.57,180,40,2,2004200,AU,1638384133,1638435824,
1,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,1638369080,39600,2147714,Sydney,200,151.2073,-33.8679,...,91,1.34,340,6,2,2001174,AU,1638383850,1638435109,1.34
2,"[{'id': 801, 'main': 'Clouds', 'description': ...",stations,10000,1638368624,34200,2073124,Darwin,200,130.8418,-12.4611,...,83,2.06,300,20,1,9574,AU,1638304910,1638350982,
3,"[{'id': 803, 'main': 'Clouds', 'description': ...",stations,10000,1638369141,36000,2174003,Brisbane,200,153.0281,-27.4679,...,86,0.45,150,75,2,2005393,AU,1638384296,1638433788,1.79
4,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,1638369162,37800,2078025,Adelaide,200,138.6,-34.9333,...,71,2.57,160,0,2,2001763,AU,1638386713,1638438299,


## Convert unix timestamp to datetime timestamp string 

In [6]:
date_fixed_weather_df = tf.convert_unix_timestamp(input_df = weather_df, date_columns=["dt"])
date_fixed_weather_df.head()

Unnamed: 0,weather,base,visibility,dt,timezone,id,name,cod,coord.lon,coord.lat,...,main.humidity,wind.speed,wind.deg,clouds.all,sys.type,sys.id,sys.country,sys.sunrise,sys.sunset,wind.gust
0,"[{'id': 802, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:26:29,39600,2172517,Canberra,200,149.1281,-35.2835,...,82,2.57,180,40,2,2004200,AU,1638384133,1638435824,
1,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:31:20,39600,2147714,Sydney,200,151.2073,-33.8679,...,91,1.34,340,6,2,2001174,AU,1638383850,1638435109,1.34
2,"[{'id': 801, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:23:44,34200,2073124,Darwin,200,130.8418,-12.4611,...,83,2.06,300,20,1,9574,AU,1638304910,1638350982,
3,"[{'id': 803, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:32:21,36000,2174003,Brisbane,200,153.0281,-27.4679,...,86,0.45,150,75,2,2005393,AU,1638384296,1638433788,1.79
4,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:32:42,37800,2078025,Adelaide,200,138.6,-34.9333,...,71,2.57,160,0,2,2001763,AU,1638386713,1638438299,


## Replace column names

In [7]:
clean_weather_df = tf.replace_column_character(date_fixed_weather_df, {".": "_"})
clean_weather_df.head()

Unnamed: 0,weather,base,visibility,dt,timezone,id,name,cod,coord_lon,coord_lat,...,main_humidity,wind_speed,wind_deg,clouds_all,sys_type,sys_id,sys_country,sys_sunrise,sys_sunset,wind_gust
0,"[{'id': 802, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:26:29,39600,2172517,Canberra,200,149.1281,-35.2835,...,82,2.57,180,40,2,2004200,AU,1638384133,1638435824,
1,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:31:20,39600,2147714,Sydney,200,151.2073,-33.8679,...,91,1.34,340,6,2,2001174,AU,1638383850,1638435109,1.34
2,"[{'id': 801, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:23:44,34200,2073124,Darwin,200,130.8418,-12.4611,...,83,2.06,300,20,1,9574,AU,1638304910,1638350982,
3,"[{'id': 803, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:32:21,36000,2174003,Brisbane,200,153.0281,-27.4679,...,86,0.45,150,75,2,2005393,AU,1638384296,1638433788,1.79
4,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:32:42,37800,2078025,Adelaide,200,138.6,-34.9333,...,71,2.57,160,0,2,2001763,AU,1638386713,1638438299,


## Rename fields

In [8]:
clean_weather_df = clean_weather_df.rename(columns={
    "id":"city_id", 
    "dt": "datetime"
})
clean_weather_df.head()

Unnamed: 0,weather,base,visibility,datetime,timezone,city_id,name,cod,coord_lon,coord_lat,...,main_humidity,wind_speed,wind_deg,clouds_all,sys_type,sys_id,sys_country,sys_sunrise,sys_sunset,wind_gust
0,"[{'id': 802, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:26:29,39600,2172517,Canberra,200,149.1281,-35.2835,...,82,2.57,180,40,2,2004200,AU,1638384133,1638435824,
1,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:31:20,39600,2147714,Sydney,200,151.2073,-33.8679,...,91,1.34,340,6,2,2001174,AU,1638383850,1638435109,1.34
2,"[{'id': 801, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:23:44,34200,2073124,Darwin,200,130.8418,-12.4611,...,83,2.06,300,20,1,9574,AU,1638304910,1638350982,
3,"[{'id': 803, 'main': 'Clouds', 'description': ...",stations,10000,2021-12-01 14:32:21,36000,2174003,Brisbane,200,153.0281,-27.4679,...,86,0.45,150,75,2,2005393,AU,1638384296,1638433788,1.79
4,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,2021-12-01 14:32:42,37800,2078025,Adelaide,200,138.6,-34.9333,...,71,2.57,160,0,2,2001763,AU,1638386713,1638438299,


## Create City DataFrame

In [9]:
city_df = clean_weather_df[["city_id", "name", "coord_lon", "coord_lat"]].drop_duplicates() 
city_df.head()

Unnamed: 0,city_id,name,coord_lon,coord_lat
0,2172517,Canberra,149.1281,-35.2835
1,2147714,Sydney,151.2073,-33.8679
2,2073124,Darwin,130.8418,-12.4611
3,2174003,Brisbane,153.0281,-27.4679
4,2078025,Adelaide,138.6,-34.9333


## Create Temperature DataFrame

In [10]:
temperature_df = clean_weather_df[["city_id", "datetime", "main_temp", "main_feels_like", "main_temp_min", "main_temp_max"]]
temperature_df.head()

Unnamed: 0,city_id,datetime,main_temp,main_feels_like,main_temp_min,main_temp_max
0,2172517,2021-12-01 14:26:29,288.15,287.85,287.07,289.12
1,2147714,2021-12-01 14:31:20,291.39,291.65,289.84,292.24
2,2073124,2021-12-01 14:23:44,301.03,305.4,300.87,302.53
3,2174003,2021-12-01 14:32:21,294.73,295.19,293.87,295.45
4,2078025,2021-12-01 14:32:42,291.4,291.13,288.07,292.83


## Create Atmosphere DataFrame

In [11]:
atmosphere_df = clean_weather_df[["city_id", "datetime", 'main_pressure', 'main_humidity']]
atmosphere_df.head()

Unnamed: 0,city_id,datetime,main_pressure,main_humidity
0,2172517,2021-12-01 14:26:29,1016,82
1,2147714,2021-12-01 14:31:20,1016,91
2,2073124,2021-12-01 14:23:44,1008,83
3,2174003,2021-12-01 14:32:21,1013,86
4,2078025,2021-12-01 14:32:42,1012,71


## Create SQL Connection

In [12]:
from sqlalchemy import create_engine
from sqlalchemy.engine import URL
from sqlalchemy.dialects import postgresql
from urllib.parse import quote_plus as urlquote
connection_url = URL.create(
    drivername = "postgresql", 
    username = db_user,
    password = db_password,
    host = "localhost", 
    port = 5432,
    database = "weather_db", 
)

engine = create_engine(connection_url)

  """)


## Reflect ORM

In [13]:
from sqlalchemy import MetaData
metadata_obj = MetaData()
metadata_obj.reflect(bind=engine)
city = metadata_obj.tables["city"]
temperature = metadata_obj.tables["temperature"]
atmosphere = metadata_obj.tables["atmosphere"]


## Upsert: City

In [14]:
insert_statement = postgresql.insert(city).values(city_df.to_dict(orient='records'))
upsert_statement = insert_statement.on_conflict_do_update(
    index_elements=['city_id'],
    set_={c.key: c for c in insert_statement.excluded if c.key not in ['city_id']})
engine.execute(upsert_statement)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7ff765a4d3d0>

## Upsert: Temperature

In [15]:
insert_statement = postgresql.insert(temperature).values(temperature_df.to_dict(orient='records'))
upsert_statement = insert_statement.on_conflict_do_update(
    index_elements=['city_id', 'datetime'],
    set_={c.key: c for c in insert_statement.excluded if c.key not in ['city_id', 'datetime']})
engine.execute(upsert_statement)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7ff7659e5910>

## Upsert Atmosphere

In [16]:
insert_statement = postgresql.insert(atmosphere).values(atmosphere_df.to_dict(orient='records'))
upsert_statement = insert_statement.on_conflict_do_update(
    index_elements=['city_id', 'datetime'],
    set_={c.key: c for c in insert_statement.excluded if c.key not in ['city_id', 'datetime']})
engine.execute(upsert_statement)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7ff7651e2f10>