In [5]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, GRU
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


In [10]:
# Load the data sets and combine them into a single usable dataframe called data series
data_series = pd.read_csv('./data/data_avg_temp.csv')

# rename columns to be more descriptive
data_series.columns = ['date', 'avg_temp', 'avg_temp_anomaly']
data_series.drop(columns=['avg_temp_anomaly'], inplace=True)


# append to the dataframe
data_cool_degree = pd.read_csv('./data/data_cool_degree_days.csv')
data_heat_degree = pd.read_csv('./data/data_heat_degree_days.csv')
data_max_temp = pd.read_csv('./data/data_max_temp.csv')
data_min_temp = pd.read_csv('./data/data_min_temp.csv')
data_palmer_z = pd.read_csv('./data/data_palmer_z.csv')
data_pdsi = pd.read_csv('./data/data_pdsi.csv')
data_phdi = pd.read_csv('./data/data_phdi.csv')
data_pmdi = pd.read_csv('./data/data_pmdi.csv')
data_precipitation = pd.read_csv('./data/data_precipitation.csv')
Death_New_Mexico = pd.read_csv('./data/Death_New_Mexico.csv')
#read in the data for deaths here , make sure empty columns are not there . Use the drop command to just remove the columns. 
Death_New_Mexico.drop(columns = ['Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11'], inplace=True)
Death_New_Mexico.drop([0, 1], inplace=True)
Death_New_Mexico['Deaths'] = Death_New_Mexico['Deaths'].str.replace(',', '')
print(Death_New_Mexico.head())




# Append columns to the data series
data_series['cool_degree_days'] = data_cool_degree['Value'] 
# data_series['cool_degree_days_anomaly'] = data_cool_degree['Anomaly']

data_series['heat_degree_days'] = data_heat_degree['Value']
# data_series['heat_degree_days_anomaly'] = data_heat_degree['Anomaly']

data_series['max_temp'] = data_max_temp['Value']
# data_series['max_temp_anomaly'] = data_max_temp['Anomaly']

data_series['min_temp'] = data_min_temp['Value']
# data_series['min_temp_anomaly'] = data_min_temp['Anomaly']

data_series['palmer_z'] = data_palmer_z['Value']
# data_series['palmer_z_anomaly'] = data_palmer_z['Anomaly']

data_series['pdsi'] = data_pdsi['Value']
# data_series['pdsi_anomaly'] = data_pdsi['Anomaly']

data_series['phdi'] = data_phdi['Value']
# data_series['phdi_anomaly'] = data_phdi['Anomaly']

data_series['pmdi'] = data_pmdi['Value']
# data_series['pmdi_anomaly'] = data_pmdi['Anomaly']

data_series['precipitation'] = data_precipitation['Value']
# data_series['precipitation_anomaly'] = data_precipitation['Anomaly']




     Year Population  Births Deaths
2  2017.0  2,102,521  23,708  18672
3  2016.0  2,103,586  24,503  18260
4  2015.0  2,099,856  25,730  17687
5  2014.0  2,098,381  25,985  17564
6  2013.0  2,095,156  26,242  16780


In [11]:
# Convert dataframe to numpy array
avg_temp = data_series['avg_temp'].to_numpy()
data_series.drop(columns=['avg_temp'], inplace=True)

## All our data as a numpy except the death_rate
x = data_series.to_numpy()

# Expand first dim
x = np.expand_dims(x, axis=0)
avg_temp = np.expand_dims(avg_temp, axis=0)

##Numpy array for the deathrate 
death_rate = Death_New_Mexico['Deaths'].to_numpy()
 
death_rate = np.array(death_rate, dtype=float)

death_rate = np.expand_dims(death_rate, axis=0)
years = Death_New_Mexico['Year'].to_numpy()
years = np.expand_dims(years, axis=0) 
print(death_rate)

[[18672. 18260. 17687. 17564. 16780. 16640. 16245. 15866. 15392. 15400.
  15400. 15231. 14866. 14197. 14493. 14114. 14016. 13384. 13433. 12858.
  12613. 12456. 12500. 12106. 11689. 11130. 11225. 10549. 10473. 10381.
  10324. 10007.  9637.  9504.  9138.  9186.  8668.  9032.  8617.  8331.
   8223.  8204.  8003.  8029.  8139.  7877.  7638.  7411.  7180.  7128.
   6897.  6971.  6801.  6902.  6837.  6507.  6344.  6503.]]


In [3]:
##death_rate Dense Network
y = death_rate
X_train, X_test, y_train, y_test = train_test_split(years, y, test_size=0.2)
                                                    
model = Sequential([ ##Dense nueral network, consisting of the two inputs year and month, 
        Dense(5, activation='relu', input_shape=(X_train.shape[1],)),  # input_shape=(2,)
        Dense(1)  # Output layer
    ]) ##defines structure of model  ##Use this to train death data to then predict years for the past. x_train will be year, and y_train will be death rate. x_test and y_test split

model.compile(optimizer='adam', loss='mse') ##
^

NameError: name 'death_rate' is not defined

In [5]:
# Train the GRU model 
model_gru = Sequential( 
    [
        GRU(20, activation='relu', input_shape=(x.shape[1], x.shape[2])), 
        Dense(1) 
    ]
)

model_gru.compile(optimizer='adam', loss='mse') 
model_gru.fit(x, avg_temp, epochs=300, batch_size=1, verbose=0) 


  super().__init__(**kwargs)


<keras.src.callbacks.history.History at 0x2984c1f40>

In [10]:
# Predict the temperature by regressing the other features and then predicting average temperature
parameters = ['cool_degree_days', 'heat_degree_days', 'max_temp', 
              'min_temp', 'palmer_z', 'pdsi',
              'phdi', 'pmdi', 'precipitation']
# parameters = ['cool_degree_days']
models = {}
scalers = {}

date = data_series['date'].to_numpy()
date_splice = np.zeros((len(date), 2))

param_models = []

for i, element in enumerate(date):
    # print(element)
    # Splice this string to get year and month
    date_splice[i,0] = int(str(element)[0:4]) ##year 
    date_splice[i,1] = int(str(element)[5:7]) ##month
## split data into 12 by year to get months 
for param in parameters: ##for every parameter, we train the model. We do this 9 times because we have 9 parameters 
    print(f"PARAM", param)
    y = data_series[param].to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(date_splice, y, test_size=0.2, shuffle=True) ##splits data to training and testing... use this same line to generate x_test, then use it as your input to the model

    model = Sequential([ ##Dense nueral network, consisting of the two inputs year and month, 
        Dense(5, activation='relu', input_shape=(X_train.shape[1],)),  # input_shape=(2,)
        Dense(1)  # Output layer
    ]) ##defines structure of model  ##Use this to train death data to then predict years for the past. x_train will be year, and y_train will be death rate. x_test and y_test split

    model.compile(optimizer='adam', loss='mse') ##
    model.fit(X_train, y_train, epochs=300, batch_size=16, validation_data=(X_test, y_test),verbose=0) ##training loop 

    # Save model
    param_models.append(model) ##collect all the models into param_model


#Write a script that runs 1000 times (Monte Carlo), and average out the simulations over time, and plot them together. 

PARAM cool_degree_days
PARAM heat_degree_days
PARAM max_temp
PARAM min_temp
PARAM palmer_z
PARAM pdsi
PARAM phdi
PARAM pmdi
PARAM precipitation


In [None]:
# Prediction of the average temperature
# Using the trained models to predict the parameters and then use those predictions as input for the GRU model

# Predict the parameters for the next 60 year * 12 months (720 datapoints)

# Get dates for prediction (202401-202812)
years = 60
date_predict = np.zeros((years*12, 2)) ##for year and month prediction 
for i in range(years*12):
    date_predict[i, 0] = 2024 + i // 12 ##year 
    date_predict[i, 1] = 1 + i % 12 ##month


param_predicts = np.zeros((len(parameters),years*12))

#  Predict the parameters.
for k, model in enumerate(param_models):
    for i in range(years*12):
        answer = model.predict(date_predict[i].reshape(1, -1)) ##produces predicted parameter output
        param_predicts[k, i] = answer

yearmonth_vec = np.zeros((years*12,1))
for i in range(years*12):
    yearmonth_vec[i] = date_predict[i, 0] * 100 + date_predict[i, 1] ##combined year and month value

params_predicts = np.append(yearmonth_vec, param_predicts.T, axis=1, verbose=0)

print(params_predicts)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


  param_predicts[k, i] = answer


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35

In [None]:
for i in range(years*12):
    print(f"Year: {int(params_predicts[i,0]//100)} Month: {int(params_predicts[i,0]%100)}")
    print(f"Predicted parameters:")
    for j, param in enumerate(parameters):
        print(f"{param}: {params_predicts[i,j+1]}")

In [None]:

# Use predicted params for GRU input to predict avg_temp
predictions = []

for i in range(years*12):
    new_timestep = params_predicts[i,:]
    new_timestep = np.expand_dims(new_timestep, axis=0)
    x = np.append(x[:,1:,:], np.expand_dims(new_timestep, axis=0), axis=1)

    # x = np.expand_dims(params_predicts[i, :], axis=0)
    avg_temp_predict = model_gru.predict(x) ##Here's the money 
    predictions.append(avg_temp_predict)


print(predictions)


In [None]:
# plot the predictions
predictions = np.array(predictions).reshape(-1)
plt.plot(yearmonth_vec[2:], predictions[2:], label='Predicted')


In [1]:
year = int(input("What year would you like to know the average temperature in New Mexico for?"))
month = input("What month in that year are you inquiring about?")

##standardize the inputs 
if ("jan" in month.lower() or '1' in month.lower()):
    month_i = 1
elif ("feb" in month.lower() or '2' in month.lower()):
    month_i = 2
elif ("mar" in month.lower() or '3' in month.lower()):
    month_i = 3
elif ("apr" in month.lower() or '4' in month.lower()):
    month_i = 4
elif ("may" in month.lower() or '5' in month.lower()):
    month_i = 5
elif ("jun" in month.lower() or '6' in month.lower()):
    month_i = 6
elif ("jul" in month.lower() or '7' in month.lower()):
    month_i = 7
elif ("aug" in month.lower() or '8' in month.lower()):
    month_i = 8
elif ("sep" in month.lower() or '9' in month.lower()):
    month_i = 9
elif ("oct" in month.lower() or '10' in month.lower()):
    month_i = 10
elif ("nov" in month.lower() or '11' in month.lower()):
    month_i = 11
elif ("dec" in month.lower() or '12' in month.lower()):
    month_i = 12
year_i = year-2024 
index = (year_i * 12) + month_i - 1
print(f"Prediction for the month {month} in the year {year} is: {predictions[index]}")


What year would you like to know the average temperature in New Mexico for? 2068
What month in that year are you inquiring about? March


NameError: name 'predictions' is not defined