# Imports

In [1]:
# Imports for data loading
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Imports for model learning
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

# Other imports
import datetime

# Cells for Colab

In [2]:
# import zipfile
# from google.colab import drive

# drive.mount('/content/drive')
# zip_path = '/content/drive/My Drive/dataset.zip'
# extract_path = '/content'

# with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#     zip_ref.extractall(extract_path)

# print(os.listdir(extract_path))

# Data Loading

In [None]:
# Dima's paths
TRAIN_PATH = r'C:\Users\aquas\Jupyter\ML_Project_git\dataset\train'
TEST_PATH = r'C:\Users\aquas\Jupyter\ML_Project_git\dataset\test'

# Tykhin's paths
#TRAIN_PATH = f'D:\\Tykhon\\Downloads\\data_folder\\train'
#TEST_PATH = f'D:\\Tykhon\\Downloads\\data_folder\\test'

# Colab paths
#TRAIN_PATH = '/content/train'
#TEST_PATH = '/content/test'

train_set = datasets.ImageFolder(root=TRAIN_PATH)
test_set = datasets.ImageFolder(root=TEST_PATH)

# Print dataset configuration
print(f"\nTraining set size: {len(train_set)} images")
print(f"Test set size: {len(test_set)} images")

class_names = train_set.classes
print(f"\nClasses [{len(class_names)}]: {class_names}")


In [None]:
# Apply custom transformations
custom_transform = transforms.Compose([
    transforms.Resize((380, 380)),
    transforms.CenterCrop(380),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.2954581669589034, 0.29541042352786206, 0.29537371119537875], std=[0.31727746077062646, 0.31729778651393764, 0.31726590300772206]),
])
train_set_transformed = datasets.ImageFolder(root=TRAIN_PATH, transform=custom_transform)
test_set_transformed = datasets.ImageFolder(root=TEST_PATH, transform=custom_transform)

# Show 5 pictures
def show_pictures(dataset: datasets.ImageFolder, pictures_num: int=5) -> None:
    start_index = 0

    fig, axes = plt.subplots(nrows=1, ncols=pictures_num, figsize=(15, 45))

    for _, ax in enumerate(axes):
        image, label = dataset[np.random.randint(0, 23787)]
        image = image.permute(1, 2, 0).numpy()
        ax.imshow(image)
        ax.set_title(f"Label: {label}")
        ax.axis("off")
    plt.show()

show_pictures(train_set_transformed)

# Model Loading

In [None]:
# Check CUDA availability
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [5]:
# # Load model
# model = torchvision.models.efficientnet_b4(weights=torchvision.models.EfficientNet_B4_Weights.DEFAULT).to(device)

# # Set parameter training to False
# for param in model.features.parameters():
#     param.requires_grad = False

# # Define classifier
# model.classifier = torch.nn.Sequential(
#     #torch.nn.Dropout(p=0.1, inplace=True), LOra!!! binary cross entropy -for multi labeling!!! mini-batching
#     torch.nn.Linear(in_features=model.classifier[1].in_features,
#                     out_features=4,
#                     bias=True)).to(device)

# # Define loss function and optimizer
# loss_fn = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [6]:
# Load model
model = torchvision.models.densenet169(weights=torchvision.models.DenseNet169_Weights.DEFAULT).to(device)

# Set parameter training to False
for param in model.features.parameters():
    param.requires_grad = True

# Define classifier
model.classifier = torch.nn.Sequential(
    torch.nn.Linear(in_features=model.classifier.in_features,
                    out_features=4,
                    bias=True)).to(device)

# Define loss function and optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


# Settings

In [7]:
# Loading custom weights (CHANGE VERSION EVERY NEW RUN!)
VERSION = 0
if VERSION != 0:
    WEIGHTS_PATH = f"model_weights_v{VERSION}.pt"
    model.load_state_dict(torch.load(WEIGHTS_PATH, weights_only=False))

In [8]:
# Set batch size
batch_size = 16
train_dataloader = DataLoader(train_set_transformed, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_set_transformed, batch_size=batch_size, shuffle=False)

In [9]:
# Set number of epochs
epochs_num = 10

# Functions

In [10]:
# Save loss plot
def show_loss_plot(axes: plt.axes, train_loss_array: list, test_loss_array: list) -> None:
    axes.plot(train_loss_array, label="Train Loss", linestyle='-', color='blue', marker='o', markersize=6, linewidth=2)
    plt.plot(test_loss_array, label="Test Loss", linestyle='--', color='orange', marker='s', markersize=6, linewidth=2)

    axes.set_xlabel("Epochs")
    axes.set_ylabel("Loss")
    axes.set_title(f"Loss Graph: v{VERSION+1}", weight='bold')
    axes.legend(loc="upper right", fontsize=12, frameon=True, shadow=True, fancybox=True, borderpad=1)
    plt.axhline(y=0, color='gray', linestyle='-', linewidth=1, alpha=0.7)

In [11]:
# Save acc plot
def show_acc_plot(axes: plt.axes, train_acc_array: list, test_acc_array: list) -> None:
    axes.plot(train_acc_array, label="Train Accuracy", linestyle='-', color='blue', marker='o', markersize=6, linewidth=2)
    axes.plot(test_acc_array, label="Test Accuracy", linestyle='--', color='orange', marker='s', markersize=6, linewidth=2)

    axes.set_xlabel("Epochs")
    axes.set_ylabel("Accuracy")
    axes.set_title(f"Accuracy Graph: v{VERSION+1}", weight='bold')
    axes.legend(loc="upper right", fontsize=12, frameon=True, shadow=True, fancybox=True, borderpad=1)
    axes.axhline(y=0, color='gray', linestyle='-', linewidth=1, alpha=0.7)

# Training

In [None]:
# Statistic for visualisation
train_loss_array = []
test_loss_array = []
train_acc_array = []
test_acc_array = []
best_acc = 0

# Training loop
for epoch in range(epochs_num):
    print(f"Epoch: {epoch}\n---------")

    train_loss, train_acc = 0, 0

    # Train
    model.train()
    for batch, (X, y) in enumerate(train_dataloader):
        X, y = X.to(device), y.to(device)

        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        # Testing how train accuracy works
        train_pred_labels = y_pred.argmax(dim=1)
        train_acc += (train_pred_labels == y).sum().item()
        # In case of хуйня result delete till here

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (batch % 10 == 0):
            print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples [{datetime.datetime.now().strftime('%H:%M:%S')}]")

    train_loss /= len(train_dataloader)
    train_acc /= len(train_dataloader.dataset)
    train_loss_array.append(train_loss)
    train_acc_array.append(train_acc)
    test_loss, test_acc = 0, 0

    # Evaluate
    model.eval()
    print(f"Evaluation... [{datetime.datetime.now().strftime('%H:%M:%S')}]")
    with torch.inference_mode():
        for X, y in test_dataloader:
            X, y = X.to(device), y.to(device)
            test_y_pred = model(X)
            test_loss += loss_fn(test_y_pred, y).item()
            test_pred_labels = test_y_pred.argmax(dim=1)
            test_acc += (test_pred_labels == y).sum().item()

        test_loss /= len(test_dataloader)
        test_acc /= len(test_dataloader.dataset)
    test_loss_array.append(test_loss)
    test_acc_array.append(test_acc)

    # Log results of every epoch
    print(f"\nTrain loss: {train_loss:.5f} | Train acc: {((train_acc)*100):.2f}% | Test loss: {test_loss:.5f} | Test acc: {((test_acc)*100):.2f}% [{datetime.datetime.now().strftime('%H:%M:%S')}]\n")
    filename = f"log_v{VERSION+1}.txt"
    with open(filename, "a") as file:
        file.write(f"epoch {epoch}:\nTrain loss: {train_loss:.5f} | Train acc: {((train_acc)*100):.2f}% | Test loss: {test_loss:.5f} | Test acc: {((test_acc)*100):.2f}% [{datetime.datetime.now().strftime('%H:%M:%S')}]\n")

    # Show and update loss/accuracy graphs
    fig1, axes1 = plt.subplots(1)
    show_loss_plot(axes1, train_loss_array, test_loss_array)
    fig2, axes2 = plt.subplots(1)
    show_acc_plot(axes2, train_acc_array, test_acc_array)

    if epoch == epochs_num:
        fig1.savefig(f"loss_graph_v{VERSION+1}.png")
        fig2.savefig(f"acc_graph_v{VERSION+1}.png")

    # Save model with best accuracy
    if test_acc > best_acc:
        best_acc = test_acc
        SAVE_PATH = f"model_best_weights_v{VERSION+1}.pt"
        torch.save(model.state_dict(), SAVE_PATH)

        # Log save
        print(f"\nWeights saved to {SAVE_PATH}")
        with open(filename, "a") as file:
            file.write(f"\nWeights saved to {SAVE_PATH}\n\n")


In [134]:
# Weights extra save
torch.save(model.state_dict(), f"model_weights_v{VERSION+1}.pt")

In [None]:
# Model save
#torch.save(model, f"model_v{VERSION+1}.pt")