In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
from tqdm import tqdm
from datasets import PolynomialDataset
from models import VAE

dataset = PolynomialDataset("Click_details", "Train_details", transform=MinMaxScaler())
features = dataset.features.iloc.values 
dataset_feature = torch.tensor(features, dtype=torch.float32)
dataloader = DataLoader(dataset_feature, batch_size=32, shuffle=True)

input_dim = features.shape[1]
latent_dim = 5
vae = VAE(input_dim, latent_dim)
optimizer = optim.Adam(vae.parameters(), lr=1e-4)
criterion = nn.MSELoss()

num_epochs = 50
vae.train()
for epoch in range(num_epochs):
    total_loss = 0
    for batch in tqdm(dataloader, desc=f"Epoch {epoch + 1}/{num_epochs}"):
        optimizer.zero_grad()
        decoded, mu, logvar, _ = vae(batch)
        loss_recon = criterion(decoded, batch)
        loss_kl = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())
        loss = loss_recon + 0.1 * loss_kl  
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(dataloader):.4f}")

vae.eval()
latent_vectors = []
with torch.no_grad():
    for batch in tqdm(dataloader, desc="Extracting latent vectors"):
        _, _, _, z = vae(batch)
        latent_vectors.append(z.numpy())
latent_vectors = np.vstack(latent_vectors)

num_clusters = 3  
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(latent_vectors)
labels = kmeans.labels_

df_clusters = dataset.features.copy()
df_clusters["Cluster"] = labels
print(df_clusters.head())


KeyboardInterrupt

