# Weather Import
## Description
Download hourly temperature data from NOAA and save it in the database. Note: The NOAA data only includes a limited number of cities so we have to match to the closest city for some of the Pecan Street data.

## Imports

In [1]:
import json
import numpy as np
import pandas as pd
from sqlalchemy import create_engine

## Database connection

In [2]:
# load credentials
credentials_file_path = '../credentials.json'
with open(credentials_file_path) as credentials_file:
    credentials = json.load(credentials_file)
    
# connect to database
engine = create_engine('mysql+mysqldb://{user}@{host}/{db}'.format(
    user = credentials['user'],
    host = credentials['host'],
    db = credentials['db']
))

conn = engine.connect()

## Remove tables if they exist
Used for refreshing the database.

In [3]:
# drop table if exists
conn.execute('DROP TABLE IF EXISTS temps')

<sqlalchemy.engine.result.ResultProxy at 0x23772b80668>

## Helper function and array for handling columns

In [4]:
columns = [ 'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN', 'LONGITUDE', 'LATITUDE', 
            'T_CALC', 'T_HR_AVG', 'T_MAX', 'T_MIN', 'P_CALC', 'SOLARAD', 'SOLARAD_FLAG', 'SOLARAD_MAX', 
            'SOLARAD_MAX_FLAG', 'SOLARAD_MIN', 'SOLARAD_MIN_FLAG', 'SUR_TEMP_TYPE', 'SUR_TEMP', 'SUR_TEMP_FLAG', 
            'SUR_TEMP_MAX', 'SUR_TEMP_MAX_FLAG', 'SUR_TEMP_MIN', 'SUR_TEMP_MIN_FLAG', 'RH_HR_AVG', 'RH_HR_AVG_FLAG', 
            'SOIL_MOISTURE_5', 'SOIL_MOISTURE_10', 'SOIL_MOISTURE_20', 'SOIL_MOISTURE_50', 'SOIL_MOISTURE_100', 
            'SOIL_TEMP_5', 'SOIL_TEMP_10', 'SOIL_TEMP_20', 'SOIL_TEMP_50', 'SOIL_TEMP_100' ]

def split(s):
    s_split = np.array(s.split(sep=' '))
    row = s_split[s_split!='']
    return list(row)

## Fetch temps for 2017-2019 and for all close cities

In [5]:
url_root = 'https://www1.ncdc.noaa.gov/pub/data/uscrn/products/hourly02/'
file_code = 'CRNH0203'

years = ['2018']

city_state_file_map = {
    'Austin, Texas': 'TX_Austin_33_NW.txt',
}

for city_state, city_file_name in city_state_file_map.items():
    
    for year in years:
        
        try:
            url = url_root + year + '/' + file_code + '-' + year + '-' + city_file_name

            # fetch city temps from NOAA
            df = pd.read_csv(url, header=None)

            # format columns
            df = pd.DataFrame(np.array(df[0].apply(split).tolist()), columns = columns)

            # clean up the format
            df['dt'] = pd.to_datetime(df['UTC_DATE'] + ' ' + df['UTC_TIME'])
            df['year'] = df['dt'].apply(lambda dt: dt.year)
            df['month'] = df['dt'].apply(lambda dt: dt.month)
            df['day'] = df['dt'].apply(lambda dt: dt.day)
            df['hour'] = df['dt'].apply(lambda dt: dt.hour)
            df['city_state'] = city_state

            df.to_sql(
                name = 'temps',
                con = conn,
                if_exists = 'append',
                index = False
            )
        except Exception as e:
            print(e)
            
print('Done!')

Done!
