# Preparatory Processing and Calcs for the Heat Pump Calculator

In [75]:
import os
import csv
from datetime import datetime
import pickle

import pandas as pd
import numpy as np
from glob import glob
import requests
from IPython.display import display

## Download TMY3 Files

In [33]:
TMY_FOLDER = r'C:\Users\Alan\Dropbox\TMY3'
TMY_URL = 'http://rredc.nrel.gov/solar/old_data/nsrdb/1991-2005/data/tmy3/{}TYA.CSV'
#for lin in open(os.path.join(TMY_FOLDER, 'tmy_list.txt')):
#    tmy_id = lin.strip().split()[0]
#    print('Processing: {}'.format(tmy_id))
#    resp = requests.get(TMY_URL.format(tmy_id))
#    open(os.path.join(TMY_FOLDER, '{}.csv'.format(tmy_id)), 'w').write(resp.text)


## Process TMY3 Files into Pickled DataFrames

In [85]:
# accumulates metadata for each TMY3 file
meta_list = []

for f in glob(os.path.join(TMY_FOLDER, '*.csv')):
    print('Processing: {}'.format(f))
    with open(f, newline='') as csvfile:
        
        # Use a csvreader just to process the header row
        tmyreader = csv.reader(csvfile)
        hdr = next(tmyreader)
        meta = dict(
            tmy_id = int(hdr[0]),
            city = hdr[1].strip(),
            state = hdr[2].strip(),
            utc_offset = float(hdr[3]),
            latitude = float(hdr[4]),
            longitude = float(hdr[5]),
            elevation = float(hdr[6]) * 3.28084   # in feet
        )
        meta_list.append(meta)
        
        # read the rest of the lines into a DataFrame
        df = pd.read_csv(csvfile)
        
        # start making final DataFrame
        df['db_temp'] = df['Dry-bulb (C)'] * 1.8 + 32.0
        df_final = df[['db_temp']].copy()
        
        # make a list of date/times with the stamp occurring in the
        # middle of the hour associated with the data.  Also, use 
        # the year 2018 for all the timestamps
        ts = []
        for dt, tm in zip(df['Date (MM/DD/YYYY)'], df['Time (HH:MM)']):
            m, d, _ = dt.split('/')
            h, _ = tm.split(':')
            ts.append( datetime(2018, int(m), int(d), int(h) - 1, 30))
        
        df_final.index = ts
        df_final['month'] = df_final.index.month
        
        # pickle it.
        df_final.to_pickle('data/climate/{}.pkl'.format(meta['tmy_id']))

df_meta = pd.DataFrame(meta_list)
df_meta.set_index('tmy_id', inplace=True)
df_meta.to_pickle('data/climate/tmy3_meta.pkl')

Processing: C:\Users\Alan\Dropbox\TMY3\700197.csv
Processing: C:\Users\Alan\Dropbox\TMY3\700260.csv
Processing: C:\Users\Alan\Dropbox\TMY3\700637.csv
Processing: C:\Users\Alan\Dropbox\TMY3\701043.csv
Processing: C:\Users\Alan\Dropbox\TMY3\701195.csv
Processing: C:\Users\Alan\Dropbox\TMY3\701330.csv
Processing: C:\Users\Alan\Dropbox\TMY3\701625.csv
Processing: C:\Users\Alan\Dropbox\TMY3\701718.csv
Processing: C:\Users\Alan\Dropbox\TMY3\701740.csv
Processing: C:\Users\Alan\Dropbox\TMY3\701780.csv
Processing: C:\Users\Alan\Dropbox\TMY3\701940.csv
Processing: C:\Users\Alan\Dropbox\TMY3\702000.csv
Processing: C:\Users\Alan\Dropbox\TMY3\702005.csv
Processing: C:\Users\Alan\Dropbox\TMY3\702035.csv
Processing: C:\Users\Alan\Dropbox\TMY3\702040.csv
Processing: C:\Users\Alan\Dropbox\TMY3\702070.csv
Processing: C:\Users\Alan\Dropbox\TMY3\702075.csv
Processing: C:\Users\Alan\Dropbox\TMY3\702084.csv
Processing: C:\Users\Alan\Dropbox\TMY3\702185.csv
Processing: C:\Users\Alan\Dropbox\TMY3\702186.csv


In [86]:
display(df_final.head())
display(df_final.tail())
df_meta

Unnamed: 0,db_temp,month
2018-01-01 00:30:00,40.1,1
2018-01-01 01:30:00,40.1,1
2018-01-01 02:30:00,39.92,1
2018-01-01 03:30:00,40.1,1
2018-01-01 04:30:00,40.1,1


Unnamed: 0,db_temp,month
2018-12-31 19:30:00,28.4,12
2018-12-31 20:30:00,27.68,12
2018-12-31 21:30:00,27.14,12
2018-12-31 22:30:00,26.6,12
2018-12-31 23:30:00,26.42,12


Unnamed: 0_level_0,city,elevation,latitude,longitude,state,utc_offset
tmy_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
700197,SELAWIK,26.24672,66.600,-160.000,AK,-9.0
700260,BARROW W POST-W ROGERS ARPT [NSA - ARM],32.80840,71.320,-156.620,AK,-9.0
700637,DEADHORSE,75.45932,70.200,-148.483,AK,-9.0
701043,POINT HOPE (AWOS),13.12336,68.350,-166.800,AK,-9.0
701195,SHISHMAREF (AWOS),6.56168,66.267,-166.050,AK,-9.0
701330,KOTZEBUE RALPH WEIN MEMORIAL,9.84252,66.883,-162.600,AK,-9.0
701625,ANAKTUVUK PASS,2155.51188,68.133,-151.733,AK,-9.0
701718,AMBLER,288.71392,67.100,-157.850,AK,-9.0
701740,BETTLES FIELD,643.04464,66.917,-151.517,AK,-9.0
701780,TANANA RALPH M CALHOUN MEM AP,232.93964,65.167,-152.100,AK,-9.0


In [87]:
# Vectorized Haversine function to find distance between two points
def haversine_np(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)

    All args must be of equal length.    

    """
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    km = 6367 * c
    return km