In [9]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
from losses import SelfSupConLoss


# Datasets
class PageBlocksDataset(Dataset):
    def __init__(self, features):
        self.features = torch.tensor(features, dtype=torch.float32)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx]


# Neural Network for SimCLR
class SimCLRNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimCLRNet, self).__init__()
        self.backbone = NetworkPhi(input_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Model: https://github.com/HC-Feynman/vpu/blob/main/model/model_vec.py 
class NetworkPhi(nn.Module):
    def __init__(self, input_size):
        super(NetworkPhi, self).__init__()
        , hidden_size=300
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, hidden_size)
        self.fc5 = nn.Linear(hidden_size, hidden_size)
        self.fc6 = nn.Linear(hidden_size, hidden_size)

    def forward(self, X):
        out = self.fc1(X)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.relu(out)
        out = self.fc4(out)
        out = self.relu(out)
        out = self.fc5(out)
        out = self.relu(out)
        out = self.fc6(out)
        out = self.relu(out)
        return out


Epoch [1/10], Average Loss: 2.6454
Epoch [2/10], Average Loss: 2.5415
Epoch [3/10], Average Loss: 2.5216
Epoch [4/10], Average Loss: 2.5167
Epoch [5/10], Average Loss: 2.5095
Epoch [6/10], Average Loss: 2.5042
Epoch [7/10], Average Loss: 2.4929
Epoch [8/10], Average Loss: 2.4918
Epoch [9/10], Average Loss: 2.4902
Epoch [10/10], Average Loss: 2.4859


In [None]:
page_blocks_url = 'https://raw.githubusercontent.com/HC-Feynman/vpu/main/data/pageblocks.txt'
page_blocks_df = pd.read_csv(page_blocks_url, delimiter=',', header=None)
# page_blocks_df.head() - Visualize 
X = page_blocks_df.iloc[:, :-1].values.astype(float)
y = page_blocks_df.iloc[:, -1].values
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

train_dataset = PageBlocksDataset(X_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

input_size = X_train.shape[1]
hidden_size = 128
output_size = 64

model = SimCLRNet(input_size, hidden_size, output_size)

# Feature Augmentation Function
def augment_features(x):
    noise = torch.randn_like(x) * 0.1  # Gaussian noise
    return x + noise


# Training Loop
contrastive_loss = SelfSupConLoss(temperature=0.5, reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=1e-3)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(10):  # Number of epochs
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        augmented_data = augment_features(data)

        z_i = model(data)
        z_j = model(augmented_data)

        loss = contrastive_loss(z_i, z_j)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/10], Average Loss: {avg_loss:.4f}')
