In [43]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np

In [349]:
# Column List
name_list = ["Station ID","Date [UTC]","Temp [F]","DP [F]","RH [%]","W Dir [Deg]","W Spd [Kts]","Alt [inHg]","1Hr-Prcp [mm]",
             "Vis [mi]","SKC1","SKC2","SKC3","Cld Hgt1 [Ft]","Cld Hgt2 [Ft]","Cld Hgt3 [Ft]","Prs Wx"]

# Data Type List
dtype_list = {"Station ID":"str","UTC":"str","Temp [F]":"float64","DP [F]":"float64","RH [%]":"float64","W Dir [Deg]":"float64",
              "W Spd [Kts]":"float64","Alt [inHg]":"float64","1Hr-Prcp [mm]":"float64","Vis [mi]":"float64","SKC1":"str",
              "SKC2":"str","SKC3":"str","Cld Hgt1 [Ft]":"float64","Cld Hgt2 [Ft]":"float64","Cld Hgt3 [Ft]":"float64",
              "Prs Wx" : "str"}

# Date Column
parse_date = ["Date [UTC]"]

# Import TxT Data
CLL = pd.read_csv('Data/CLL.txt', sep='\t', header=0, names = name_list, dtype = dtype_list, parse_dates = parse_date, index_col = 1).resample('1d').mean()
DFW = pd.read_csv('Data/DFW.txt', sep='\t', header=0, names = name_list, dtype = dtype_list, parse_dates = parse_date, index_col = 1).resample('1d').mean()
AUS = pd.read_csv('Data/AUS.txt', sep='\t', header=0, names = name_list, dtype = dtype_list, parse_dates = parse_date, index_col = 1).resample('1d').mean()
IAH = pd.read_csv('Data/IAH.txt', sep='\t', header=0, names = name_list, dtype = dtype_list, parse_dates = parse_date, index_col = 1).resample('1d').mean()

In [392]:
class Prep:
    
    def fill_missing(dataframe, method='linear'):
        if method == 'linear':
            for i in dataframe.columns:
                mask = np.isnan(dataframe[i])
                dataframe[i][mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), dataframe[i][~mask])
            return dataframe
        
    def howdy(dataframe):
        # boolean transform of precipitation
#         prcp = np.zeros(dataframe.shape[0])
#         for i in range(dataframe.shape[0]):
#             if dataframe['1Hr-Prcp [mm]'][i] > 0:
#                 prcp[i] = 1
#         print(dataframe)
        dataframe = dataframe.assign(prcp=abs(dataframe['1Hr-Prcp [mm]'])!=-dataframe['1Hr-Prcp [mm]'])
        return dataframe
    
    def norm(dataframe, method='standard'):
        if method == 'standard':
            scaler = StandardScaler()
            for i in dataframe:
                dataframe = scaler.fit_transform(dataframe)
            return dataframe
        
    def inverse(data):
        return scaler.inverse_transofrm(data)
    
    def train_test(dataset, his = 0.9):
    # split into train and test sets
        train_size = int(len(dataset) * his)
        test_size = len(dataset) - train_size
        train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
        return train, test
    
    def split(data, n_steps):
        X, y = list(), list()
        for i in range(len(data)):
            end_ix = i + n_steps
            if end_ix > len(data):
                break
            seq_x, seq_y = data[i:end_ix, :], data[end_ix-1, 6]
            X.append(seq_x)
            y.append(seq_y)
        return np.array(X), np.array(y)


In [393]:
airports = ['CLL', 'DFW', 'IAH', 'AUS']
for airport in airports:
    airport = eval(airport)
    airport = Prep.fill_missing(airport) # fill NaNs
#     airport= Prep.norm(airport) # normalize data
#     airport = Prep.howdy(airport)

# 1,2,3 days data, e.g. CLL
CLL_n = Prep.norm(CLL)
train, test = Prep.train_test(CLL_n)
his_1, target_1 = Prep.split(train, 1)
his_2, target_2 = Prep.split(train, 2)
his_3, target_3 = Prep.split(train, 3)