In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn import decomposition
from sklearn.manifold import TSNE

In [2]:
data = pd.read_csv('~/Desktop/columbia/capstone/fire-regimes/data/merged_output.csv').drop(columns=['id1','id2','time','x','y'])

df_dedup = data.groupby(['source_file','level'], as_index=False).mean()
df_pivoted = df_dedup.set_index(['source_file', 'level']).unstack('level')
df_pivoted.columns = [f'{level}-{var}' for var, level in df_pivoted.columns]

X = df_pivoted[~np.isnan(df_pivoted).any(axis=1)]

scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

### Load in data for torch

In [3]:
train_data = torch.from_numpy(X).float()
train_loader = torch.utils.data.DataLoader(dataset = train_data, batch_size=64, shuffle=True)

### An AutoEncoder Class with dynamic latent dimension

In [15]:
class AutoEncoder(torch.nn.Module):
    def __init__(self, latent_dim):
        super(AutoEncoder, self).__init__()

        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(174, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, latent_dim)  # Use latent_dim here
        )
         
        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(latent_dim, 64),  # Start with latent_dim here
            torch.nn.ReLU(),
            torch.nn.Linear(64, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 174)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [None]:
model = AutoEncoder(latent_dim=12)
 
# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()
 
optimizer = torch.optim.Adam(model.parameters(),
                             lr = 0.01,
                             weight_decay = 1e-8)

In [28]:
def train_model(model,n_epochs):

  losses = []

  for _ in range(n_epochs):
    for profile in train_loader:
        
      # Output of Autoencoder
      reconstructed = model(profile)
        
      # Calculating the loss function
      loss = loss_function(reconstructed, profile)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
        
      losses.append(loss)
  
  return (model,losses)

In [19]:
def rolling_average(data, window_size):
    return np.convolve(data, np.ones(window_size) / window_size, mode='valid')

def plot_loss(losses,window_size):
    loss_floats = [loss.item() for loss in losses]
    smoothed_losses = rolling_average(loss_floats, window_size)
    plt.plot(smoothed_losses)
    plt.show()

In [30]:
model,losses = train_model(model,1)

In [None]:
plot_loss(losses=losses,window_size=20)

In [13]:
r = model(train_data).detach().numpy()
train = train_data.numpy()
l = model.encoder(train_data).detach().numpy()

In [None]:
tsne = TSNE(n_components=2, random_state=1)
X_tsne = tsne.fit_transform(l)

plt.scatter(X_tsne[:, 0], X_tsne[:, 1])

In [14]:
np.save('latent.npy',l)