In [1]:
import os
import sys
sys.path.append("..")

In [2]:
import mlflow

import mlflow
import lightning.pytorch as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from torchvision import datasets, transforms

mlflow.pytorch.autolog()
mlflow.set_experiment("mnist")

print(torch.cuda.is_available())

# define any number of nn.Modules (or use your current ones)
encoder = nn.Sequential(nn.Linear(28 * 28, 64), nn.ReLU(), nn.Linear(64, 3))
decoder = nn.Sequential(nn.Linear(3, 64), nn.ReLU(), nn.Linear(64, 28 * 28))

# define the LightningModule
class LitAutoEncoder(pl.LightningModule):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder

    def training_step(self, batch, batch_idx):
        x, _ = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        self.log("train_loss", loss)
        return loss
    
    def test_step(self, batch, batch_idx):
        x, _ = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        test_loss = F.mse_loss(x_hat, x)
        self.log("test_loss", test_loss)
        return test_loss
    
    def validation_step(self, batch, batch_idx):
        x, _ = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        val_loss = F.mse_loss(x_hat, x)
        self.log("val_loss", val_loss)
        return

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-3)
        return optimizer


# init the autoencoder
autoencoder = LitAutoEncoder(encoder, decoder)

# Load data sets
transform = transforms.ToTensor()
train_set = datasets.MNIST(root="MNIST", download=True, train=True, transform=transform)
test_set = datasets.MNIST(root="MNIST", download=True, train=False, transform=transform)

# use 20% of training data for validation
train_set_size = int(len(train_set) * 0.8)
valid_set_size = len(train_set) - train_set_size

# split the train set into two
seed = torch.Generator().manual_seed(42)
train_set, valid_set = data.random_split(train_set, [train_set_size, valid_set_size], generator=seed)

train_loader = data.DataLoader(train_set, batch_size=128, shuffle=True)
val_loader = data.DataLoader(valid_set, batch_size=128)
test_loader = data.DataLoader(test_set, batch_size=128)

# train the model (hint: here are some helpful Trainer arguments for rapid idea iteration)
lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval='step')
trainer = pl.Trainer(accelerator="gpu", max_epochs=100, callbacks=[lr_monitor])
trainer.fit(model=autoencoder, train_dataloaders=train_loader, val_dataloaders=val_loader)
trainer.test(test_dataloaders=test_loader)


* 'schema_extra' has been renamed to 'json_schema_extra'
2023/10/25 08:56:22 INFO mlflow.tracking.fluent: Experiment with name 'mnist' does not exist. Creating a new experiment.


True


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
v:\Code\cnc-monitoring-system\.conda\lib\site-packages\lightning\pytorch\trainer\connectors\logger_connector\logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
2023/10/25 08:56:23 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'd98bafe60d86445381f76fc46a77af37', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current pytorch workflow
You are using a CUDA device ('NVIDIA GeForce RTX

Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

v:\Code\cnc-monitoring-system\.conda\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

v:\Code\cnc-monitoring-system\.conda\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 49:  82%|████████▏ | 614/750 [00:08<00:01, 69.74it/s, v_num=0]

v:\Code\cnc-monitoring-system\.conda\lib\site-packages\lightning\pytorch\trainer\call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


In [None]:
# import torch

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(device)

In [None]:
# from src.common import data_loader_utils

# machines = ["M01","M02","M03"]
# process_names = ["OP07"]
# labels = ["good"]
# path_to_dataset = "../data/"

# X_data = []
# for process_name in process_names:
#     for machine in machines:
#         for label in labels:
#             data_path = os.path.join(path_to_dataset, machine, process_name, label)
#             data_list, _ = data_loader_utils.load_tool_research_data(data_path, label=label)
#             X_data.extend(data_list)

# print(len(X_data))