In [51]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error,mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from tcn import TCN
from tensorflow.keras.layers import Dense,Reshape

from scipy.signal import savgol_filter
import matplotlib.pyplot as plt
from tensorflow.keras import Sequential
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import logging
logging.getLogger('tensorflow').disabled = True

In [2]:
# df = pd.read_csv('combined.csv').set_index(['State Name','Year'])
# state = 'Alabama'
# column = 'Asthma'

# df = df.iloc[:,-6:]

In [3]:
disease_df = pd.read_csv('disease.csv').set_index(['State Name','Year'])
air_df = pd.read_csv('air.csv').set_index(['State Name','Year'])

In [4]:
deaths_df = pd.read_csv('deathonly.csv', dtype = {'YEAR':np.int64})
deaths_df['State Name'] = deaths_df.URL.str.split("/").apply(lambda x: x[4].capitalize() )

deaths_df = deaths_df.rename(columns = {'YEAR':'Year'})

deaths_df = deaths_df[['Year','State Name','RATE']].set_index(['State Name','Year']).sort_index()
deaths_df = deaths_df.rename(columns = {'RATE':'Chronic respiratory diseases'})
deaths_df = deaths_df +15

In [5]:
df = pd.concat([deaths_df,disease_df[['Chronic respiratory diseases']]]).sort_index()

In [6]:
df = df.merge(air_df , on = ['State Name','Year'])

In [54]:
future_window = 6
lookback_window = 10

In [55]:
def create_windows(data, lookback_window,future_window):
    X, y = [], []
    for i in range(lookback_window, len(data)-future_window):
        X.append(data[i - lookback_window:i])
        y.append(data[i:i+future_window])
    X = np.array(X)
    y = np.array(y)
    return X, y

def build_forecast_model(input_shape,output_shape):
    model = Sequential(
    layers=[
        TCN(input_shape=(input_shape),
        nb_filters=128,
        kernel_size=3,
        return_sequences=False
        ), 
        Dense(output_shape)  # output.shape = (batch, output_timesteps, output_dim)
    ]
    )
    model.compile('adam', 'mae')
    return model


def forecast_variable(df):
    data = df.values
    scaler = StandardScaler()#MinMaxScaler(feature_range= (-1,1))
    train_df = df.values.reshape(-1,1)
    test_df = data[-future_window:].reshape(-1,1)
    train_data = scaler.fit_transform(train_df)
    X_test = scaler.transform(test_df)
    
    
    X_train,y_train = create_windows(train_data, lookback_window,future_window)
    
    
    input_shape = (lookback_window, 1)
    output_shape = future_window
    
    model = build_forecast_model(input_shape,output_shape)
    history = model.fit(X_train, y_train, epochs=100, verbose=0)
    forecast = model.predict(X_test) # forecated values
    y_pred = forecast[:,-1] #taking only the last value
    y_pred = scaler.inverse_transform(y_pred.reshape(-1,1))  
    years = list(df.index) + list(np.arange(max(df.index)+ 1,max(df.index) + future_window + 1))
    new_df = pd.Series(index = years,dtype = np.float64)
    new_df.index.name = 'Year'
    new_df.iloc[:-future_window] = df.values
    new_df.iloc[-future_window:] = y_pred.reshape(-1)
    return new_df

def evaluate(test_data,y_pred):
    mae = mean_absolute_error(test_data,y_pred)
    mse = mean_squared_error(test_data,y_pred)
    rmse = np.sqrt(mse)
    return mae, rmse

def forecast_state(df):
    state = df.index[0][0]
    df = df.loc[state]
    future_df = df.apply(lambda x: forecast_variable(x))
    return future_df

In [56]:
forecasted_df = df.groupby('State Name').apply(lambda x: forecast_state(x))

In [57]:
forecasted_df 

Unnamed: 0_level_0,Unnamed: 1_level_0,PM10 Total 0-10um STP,Sulfur dioxide
State Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama,1982,29.924603,10.323130
Alabama,1983,35.640561,10.839509
Alabama,1984,35.405204,8.745030
Alabama,1985,43.588198,8.325578
Alabama,1986,45.213074,10.341658
...,...,...,...
Arkansas,2021,18.412060,3.491275
Arkansas,2022,17.081846,3.447814
Arkansas,2023,15.908184,3.405563
Arkansas,2024,15.975808,3.405894
