In [None]:
import numpy as np
import pandas as pd
import hvplot.pandas
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import os

In [None]:
df = pd.read_csv('../../../../Data/housing_data_encoded.csv',infer_datetime_format=True, parse_dates=True)
df = df.drop(columns="State_TX")
df = df.sort_index()
df

In [None]:
#Citation: https://stackoverflow.com/questions/53731432/return-pandas-multiindex-as-list-of-tuples

arrays = [df["Identifier"].replace(" ",""),
          df["Bedroom"].replace(" ",""),
          df["Date"].replace(" ","")]

tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=['Region_ID', 'Bedrooms','Date'])

TX_nbhoods_df = df.set_index(index)
TX_nbhoods_df = TX_nbhoods_df.drop(columns=["Identifier","Bedroom","Date"])
TX_nbhoods_df = TX_nbhoods_df.sort_index()
TX_nbhoods_df

In [None]:
TX_nbhoods_df.index

In [None]:
def window_data(df, window, feature_col_number, target_col_number):
    X = []
    y = []
    for i in range(len(df) - window - 1):
        features = df.iloc[i:(i + window), feature_col_number]
        target = df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [None]:
def build_by_region_beds(tuple_,window_size):
    region_bed = tuple_[:2]
    region_bed_df = TX_nbhoods_df.loc[region_bed]
    prices_df = region_bed_df.loc[:,["Price"]]
    prices_df.sort_index(inplace=True)
    feature_column = 0
    target_column = 0
    X, y = window_data(prices_df, window_size, feature_column, target_column)

    split = int(0.7 * len(X))
    X_train = X[: split]
    X_old = prices_df
    X_old.rename(columns={"Price":"Real"})
    X_test = X[split:]
    X_keep = X_test
    dates = X
    y_train = y[: split]
    y_test = y[split:]

    scaler = MinMaxScaler()
    scaler.fit(X)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    scaler.fit(y)
    y_train = scaler.transform(y_train)
    y_test = scaler.transform(y_test)

    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    model = Sequential()

    number_units = 5
    dropout_fraction = 0.2


    model.add(LSTM(
        units=number_units,
        return_sequences=True,
        input_shape=(X_train.shape[1], 1))
        )
    model.add(Dropout(dropout_fraction))

    model.add(LSTM(units=number_units, return_sequences=True))
    model.add(Dropout(dropout_fraction))

    model.add(LSTM(units=number_units))
    model.add(Dropout(dropout_fraction))

    model.add(Dense(1))

    model.compile(optimizer="adam", loss="mean_squared_error")


    model.fit(X_train, y_train, epochs=10, shuffle=False, batch_size=1, verbose=1)

    loss = model.evaluate(X_test, y_test)
    predicted = model.predict(X_test)

    previous_prices = X_old
    predicted_prices = scaler.inverse_transform(predicted)
    real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

    final_df = pd.DataFrame({
        "Real": real_prices.ravel(),
        "Predicted": predicted_prices.ravel()},
        index= prices_df.index[(-len(real_prices)):]) 
    

    return [final_df, loss, X_old]


In [None]:
for tuple_ in set(TX_nbhoods_df.index):
    for i in [1,3,6,12]:
        model = build_by_region_beds(tuple_,i)
        output = model[0]
        loss_val = model[1]
        old_prices = model[2]
        index = pd.MultiIndex.from_tuples([tuple_[:2]],names=["Region","Bedrooms"])
        loss = pd.DataFrame(columns={"Loss":loss_val},index=index)
        loss["Loss"] = loss_val
        Predicted_vs_Real = output.plot()
        Train_vs_Test = pd.concat([old_prices, output["Predicted"]],axis=1,join='outer')
        Train_vs_Test_plt = Train_vs_Test.plot()

        path = f'../Results/Region/{tuple_[0]}/Bedrooms/{tuple_[1]}/Window-Size/{str(i)}/'
        
        
        if os.path.isdir(path)==True:
            pass
            
        else:
            os.makedirs(path)
            output.to_csv(f'../Results/Region/{tuple_[0]}/Bedrooms/{tuple_[1]}/Window-Size/{str(i)}/Results.csv')
            loss.to_csv(f'../Results/Region/{tuple_[0]}/Bedrooms/{tuple_[1]}/Window-Size/{str(i)}/Loss.csv') 

            # Citation: https://stackoverflow.com/questions/18992086/save-a-pandas-series-histogram-plot-to-file
            Predicted_vs_Real.figure.savefig(f'../Results/Region/{tuple_[0]}/Bedrooms/{tuple_[1]}/Window-Size/{str(i)}/Plot.png')
            Train_vs_Test_plt.figure.savefig(f'../Results/Region/{tuple_[0]}/Bedrooms/{tuple_[1]}/Window-Size/{str(i)}/Plot2.png')