In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import make_classification

# Generate a synthetic dataset
X_orig, y_orig = make_classification(
    n_samples=2000,
    n_features=20,        # Original number of features
    n_informative=15,    # Number of informative features
    n_redundant=3,        # Number of redundant features
    n_repeated=0,        # Number of duplicated features
    n_classes=2,        # Number of classes for the target variable y
    n_clusters_per_class=2,
    flip_y=0.01,
    random_state=42
)

print(f"Original data shape: {X_orig.shape}")
# Original data shape: (2000, 20)

Original data shape: (2000, 20)


In [3]:
MOTOR = "Nabla"
PATH = f"../../dataset/{MOTOR}/"
TRAIN_FILE = "_all_scaled_train.csv"
TEST_FILE = "_all_scaled_test.csv"

train_data = pd.DataFrame()

train_data = pd.concat([train_data, pd.read_csv(f'{PATH}idiq{TRAIN_FILE}').drop(columns = "Unnamed: 0")], axis = 1)
train_data['speed'] = pd.read_csv(f'{PATH}speed{TRAIN_FILE}')['N']
train_data = pd.concat([train_data, pd.read_csv(f'{PATH}xgeom{TRAIN_FILE}').drop(columns = "Unnamed: 0")], axis = 1)
train_data['hysteresis'] = pd.read_csv(f'{PATH}hysteresis{TRAIN_FILE}')['total']
train_data['joule'] = pd.read_csv(f'{PATH}joule{TRAIN_FILE}')['total']

test_data = pd.DataFrame()

test_data = pd.concat([test_data, pd.read_csv(f'{PATH}idiq{TEST_FILE}').drop(columns = "Unnamed: 0")], axis = 1)
test_data['speed'] = pd.read_csv(f'{PATH}speed{TEST_FILE}')['N']
test_data = pd.concat([test_data, pd.read_csv(f'{PATH}xgeom{TEST_FILE}').drop(columns = "Unnamed: 0")], axis = 1)
test_data['hysteresis'] = pd.read_csv(f'{PATH}hysteresis{TEST_FILE}')['total']
test_data['joule'] = pd.read_csv(f'{PATH}joule{TEST_FILE}')['total']


In [4]:
class MotorDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [5]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, latent_dim),
            nn.ReLU() # The bottleneck layer
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
            nn.Sigmoid() # Sigmoid for [0,1] output
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


In [6]:
target = ['hysteresis', 'joule']

train_dataset = MotorDataset(train_data.drop(columns = target), train_data[target])
test_dataset = MotorDataset(test_data.drop(columns = target), test_data[target])

BATCH_SIZE = 256

train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = True)

In [7]:
input_dim = len(train_data.columns.drop(target))
latent_dim = 20

autoencoder_model = Autoencoder(input_dim, latent_dim)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
autoencoder_model.to(device)

print(autoencoder_model)

Autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=11, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=20, bias=True)
    (5): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=20, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=11, bias=True)
    (5): Sigmoid()
  )
)


In [8]:
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder_model.parameters(), lr=0.001)

epochs = 10
train_losses = []
val_losses = []

for epoch in range(epochs):
    # Training
    autoencoder_model.train()
    running_train_loss = 0.0
    
    for data, _ in train_loader: # _ is the target, which is same as data
        data = data.to(device)
        optimizer.zero_grad()
        outputs = autoencoder_model(data)
        loss = criterion(outputs, data)
        loss.backward()
        optimizer.step()
        running_train_loss += loss.item() * data.size(0)

    epoch_train_loss = running_train_loss / len(train_loader.dataset)
    train_losses.append(epoch_train_loss)

    # Validation
    autoencoder_model.eval()
    running_val_loss = 0.0
    with torch.no_grad():
        for data, _ in test_loader: # _ is the target, which is same as data
            data = data.to(device)
            outputs = autoencoder_model(data)
            loss = criterion(outputs, data)
            running_val_loss += loss.item() * data.size(0)

    epoch_val_loss = running_val_loss / len(test_loader.dataset)
    val_losses.append(epoch_val_loss)

    print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {epoch_train_loss:.4f}, Val Loss: {epoch_val_loss:.4f}')

Epoch [1/10], Train Loss: 1.0145, Val Loss: 0.8321
Epoch [2/10], Train Loss: 0.7128, Val Loss: 0.6379
Epoch [3/10], Train Loss: 0.6256, Val Loss: 0.6186
Epoch [4/10], Train Loss: 0.6035, Val Loss: 0.5883
Epoch [5/10], Train Loss: 0.5837, Val Loss: 0.5851
Epoch [6/10], Train Loss: 0.5771, Val Loss: 0.5718
Epoch [7/10], Train Loss: 0.5716, Val Loss: 0.5695
Epoch [8/10], Train Loss: 0.5690, Val Loss: 0.5642
Epoch [9/10], Train Loss: 0.5646, Val Loss: 0.5603
Epoch [10/10], Train Loss: 0.5584, Val Loss: 0.5616
