# Recreating Heat Index with a multiple linear regression

In [None]:
%matplotlib inline

In [None]:
import tensorflow as tf
import numpy as np
import tensorflow.keras as keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt
import pandas as pd
import copy

Let's load in our ASOS station dataset.

In [None]:
input_data = pd.read_csv("./al_asos_jul_23_v2.csv")

In [None]:
input_data

In [None]:
# It is often good practice to drop missing data, so let's do just that. 
input_data_nona = input_data.dropna()

In [None]:
input_data_nona

583 rows!?! That's not enough. Remember to be careful as you're doing this...

In [None]:
input_data_nona = input_data.drop('gust', axis=1).dropna()

In [None]:
input_data_nona

See any issues here?

In [None]:
input_data_renamed_clouds = copy.deepcopy(input_data)
all_cloud_vals = input_data_renamed_clouds['skyc1']
all_cloud_vals.fillna('CLR')
input_data_renamed_clouds['skyc1'] = all_cloud_vals
input_data_renamed_clouds

In [None]:
input_data_nona = input_data_renamed_clouds.drop(['gust', 'skyl1', 'mslp'], axis=1).dropna()

In [None]:
plt.scatter(input_data_nona['tmpf'], input_data_nona['feel'])
plt.xlabel("Temperature (F)")
plt.ylabel("Heat index (F)")

In [None]:
def nws_heat_index(T, RH):
    '''
    T in fahrenheit, RH in %
    https://www.wpc.ncep.noaa.gov/html/heatindex_equation.shtml
    '''
    initial_hi =  0.5 * (T + 61.0 + ((T-68.0)*1.2) + (RH*0.094))
    if initial_hi < 80:
        return initial_hi

    adjustment_factor = 0
    if T>=80 and T<=112 and RH <13:
        adjustment_factor =  ((13-RH)/4)*np.sqrt((17-np.abs(T-95.))/17)
    
    if T>=80 and T<=87 and RH > 85:
         adjustment_factor = ((RH-85)/10) * ((87-T)/5)

    return (-42.379 + 2.04901523*T + 10.14333127*RH - .22475541*T*RH - .00683783*T*T - 
           .05481717*RH*RH + .00122874*T*T*RH + .00085282*T*RH*RH - .00000199*T*T*RH*RH) + adjustment_factor

nws_heat_index_vectorized = np.vectorize(nws_heat_index)

In [None]:
our_calc_hi = nws_heat_index_vectorized(input_data_nona['tmpf'], input_data_nona['relh'])

In [None]:
plt.scatter(our_calc_hi, input_data_nona['feel'])
plt.xlabel("Found HI Equation (F)")
plt.ylabel("IEM Reported Feels Like")

In [None]:
plt.scatter(our_calc_hi, input_data_nona['feel'])
plt.xlabel("Found HI Equation (F)")
plt.ylabel("IEM Reported Feels Like")
plt.xlim(80, 120)
plt.ylim(80, 120)

Now, to see if we can't get to a better answer with multiple linear regression. Can we get to completely the right answer?

In [None]:
# okay, we have a more complicated situation now. Let's try with the most simple first. 
layer_1 = Dense(units=1, input_shape=[1], activation=None)
model = Sequential([layer_1])

In [None]:
model.compile(optimizer='sgd', loss='mean_squared_error')

In [None]:
model.fit(input_data_nona['tmpf'].values, input_data_nona['feel'].values, epochs=1, batch_size=20)

In [None]:
layer_1 = Dense(units=1, input_shape=[1], activation=None)
model = Sequential([layer_1])
model.compile(optimizer=keras.optimizers.legacy.Adam(learning_rate=0.0001), loss='mean_squared_error')

In [None]:
model.summary()

In [None]:
model.fit(input_data_nona['tmpf'].values, input_data_nona['feel'].values, epochs=10, batch_size=20)

In [None]:
# okay, we have a more complicated situation now. Let's try with the most simple first. 
layer_1 = Dense(units=1, input_shape=[1], activation=None)
model = Sequential([layer_1])
model.compile(optimizer=keras.optimizers.legacy.Adam(learning_rate=0.0001, clipnorm=0.001), loss='mean_squared_error')

In [None]:
model.fit(input_data_nona['tmpf'].values, input_data_nona['feel'].values, epochs=5)

In [None]:
predictions = model.predict(input_data_nona['tmpf'])

In [None]:
plt.scatter(predictions, input_data_nona['feel'])
plt.xlabel("Predicted from model (F)")
plt.ylabel("IEM Reported Feels Like")
plt.xlim(50, 120)
plt.ylim(50, 120)
plt.plot([50, 120], [50, 120], color='grey', ls='--')

In [None]:
# adding an input layer
model = Sequential()
model.add(keras.Input(shape=(2,)))
model.add(Dense(units=1, activation=None))

model.compile(optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001, clipnorm=0.01), loss='mean_squared_error')

In [None]:
model.output_shape

In [None]:
model.input_shape

In [None]:
model.fit(x=input_data_nona[['tmpf', 'relh']].values, 
          y=input_data_nona['feel'].values, epochs=5)

In [None]:
predictions = model.predict(input_data_nona[['tmpf', 'relh']].values)

In [None]:
plt.scatter(predictions, input_data_nona['feel'])
plt.xlabel("Predicted from model (F)")
plt.ylabel("IEM Reported Feels Like")
plt.xlim(50, 120)
plt.ylim(50, 120)
plt.plot([50, 120], [50, 120], color='grey', ls='--')

Okay, that's better, but can we make it even better?

In [None]:
# adding an input layer
model = Sequential()
model.add(keras.Input(shape=(2,)))
# add two layers to our model
model.add(Dense(units=2, activation=None))
model.add(Dense(units=1, activation=None))

model.compile(optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001, clipnorm=0.01), loss='mean_squared_error')

In [None]:
model.fit(x=input_data_nona[['tmpf', 'relh']].values, 
          y=input_data_nona['feel'].values, epochs=5)

In [None]:
predictions = model.predict(input_data_nona[['tmpf', 'relh']].values)

In [None]:
plt.scatter(predictions, input_data_nona['feel'])
plt.xlabel("Predicted from model (F)")
plt.ylabel("IEM Reported Feels Like")
plt.xlim(50, 120)
plt.ylim(50, 120)
plt.plot([50, 120], [50, 120], color='grey', ls='--')

In [None]:
# adding an input layer
model = Sequential()
model.add(keras.Input(shape=(2,)))
# add two layers to our model
model.add(Dense(units=5, activation=None))
model.add(Dense(units=3, activation=None))
model.add(Dense(units=6, activation=None))
model.add(Dense(units=1, activation=None))

model.compile(optimizer=keras.optimizers.legacy.Adam(learning_rate=0.001, clipnorm=0.01), loss='mean_squared_error')

In [None]:
model.fit(x=input_data_nona[['tmpf', 'relh']].values, 
          y=input_data_nona['feel'].values, epochs=5)

In [None]:
predictions = model.predict(input_data_nona[['tmpf', 'relh']].values)

In [None]:
plt.scatter(predictions, input_data_nona['feel'])
plt.xlabel("Predicted from model (F)")
plt.ylabel("IEM Reported Feels Like")
plt.xlim(50, 120)
plt.ylim(50, 120)
plt.plot([50, 120], [50, 120], color='grey', ls='--')

In [None]:
# adding an input layer
model = Sequential()
model.add(keras.Input(shape=(2,)))
# add two layers to our model
model.add(Dense(units=50, activation='relu'))
model.add(Dense(units=50, activation='relu'))
model.add(Dense(units=50, activation='relu'))

model.add(Dense(units=1, activation=None))

model.compile(optimizer=keras.optimizers.legacy.Adam(learning_rate=0.00001), loss='mean_squared_error')

In [None]:
model.fit(x=input_data_nona[['tmpf', 'relh']].values, 
          y=input_data_nona['feel'].values, epochs=10)

In [None]:
predictions = model.predict(input_data_nona[['tmpf', 'relh']].values)

In [None]:
plt.scatter(predictions, input_data_nona['feel'])
plt.xlabel("Predicted from model (F)")
plt.ylabel("IEM Reported Feels Like")
plt.xlim(50, 120)
plt.ylim(50, 120)
plt.plot([50, 120], [50, 120], color='grey', ls='--')

In [None]:
# adding an input layer
model = Sequential()
model.add(keras.Input(shape=(2,)))
model.add(Dense(units=50, activation='relu'))
model.add(Dense(units=50, activation='relu'))
model.add(Dense(units=50, activation='relu'))

model.add(Dense(units=1, activation=None))

model.compile(optimizer=keras.optimizers.legacy.Adam(learning_rate=0.00001), loss='huber')

In [None]:
model.fit(x=input_data_nona[['tmpf', 'relh']].values, 
          y=input_data_nona['feel'].values, epochs=5)

In [None]:
predictions_2 = model.predict(input_data_nona[['tmpf', 'relh']].values)

In [None]:
plt.scatter(predictions_2, input_data_nona['feel'])
plt.xlabel("Predicted from model (F)")
plt.ylabel("IEM Reported Feels Like")
plt.xlim(50, 120)
plt.ylim(50, 120)
plt.plot([50, 120], [50, 120], color='grey', ls='--')

In [None]:
# adding an input layer
model = Sequential()
model.add(keras.Input(shape=(2,)))
model.add(keras.layers.Normalization(axis=-1))
model.add(Dense(units=50, activation='sigmoid'))
model.add(Dense(units=50, activation='relu'))
model.add(Dense(units=50, activation='relu'))

model.add(Dense(units=1, activation=None))

model.compile(optimizer=keras.optimizers.legacy.Adam(), loss='huber')

In [None]:
model.fit(x=tf.convert_to_tensor(input_data_nona[['tmpf', 'relh']]), 
          y=tf.convert_to_tensor(input_data_nona['feel']), epochs=5)

In [None]:
predictions_normed = model.predict(input_data_nona[['tmpf', 'relh']].values)

In [None]:
plt.scatter(predictions_normed, input_data_nona['feel'])
plt.xlabel("Predicted from model (F)")
plt.ylabel("IEM Reported Feels Like")
plt.xlim(50, 120)
plt.ylim(50, 120)
plt.plot([50, 120], [50, 120], color='grey', ls='--')

In [None]:
model.save("my_model.keras")

In [None]:
model.summary()