### Breakdown of extra classes
- LeNet: Main code that implements a normal CNN(LeNet)
- Data: Holds DiffractionDataset, which is a subclass of the PyTorch 'Dataset' class, basically just turns our tensors(data) into useable values that we can throw into models and do distributed training

In [None]:
import torch
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas
from collections import OrderedDict
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, mean_squared_error

import os
from datetime import datetime
import math

#Our Own Created Classes
from LeNet import LeNet
from Data import DiffractionDataset

# Step 1: Load and Analyze Data

In [None]:
#might have to do /content/ if used colab
TrainingData = torch.load("/content/100k_COD_TestTensor.pt") #dict with diffraction patterns and space group

### Load and AnalyzeTraining Data

In [None]:
data = TrainingData['X']
labels = TrainingData['Y']
mapping = torch.load("/content/mapping.pt") #our data is in numbers, this maps numbers to actual bravais lattices


In [None]:
labels.shape #number of entires for train/val

The next module shows the distribution of every bravais lattice, and shows the sad reality of scientific data -- a label imbalance

In [None]:
unique_values, counts = torch.unique(labels, return_counts=True)
plt.bar(unique_values.numpy(), counts.numpy())
plt.xlabel("Bravais Lattice Class")
plt.ylabel("Count")
plt.title("Distribution of labels")

### Create Validation Set
- The purpose of a validation set is used to analyze our model's preformance on data that it hasnt trained on
- We use the test set as a "final" metric

In [None]:
data_np = data.numpy()
labels_np = labels.numpy()
nptrainx, npvalx, nptrainy, npvaly = train_test_split( data_np, labels_np, test_size=0.05, random_state=42)

validation_data = torch.from_numpy(npvalx)
validation_labels = torch.from_numpy(npvaly)
train_data = torch.from_numpy(nptrainx)
train_labels = torch.from_numpy(nptrainy)
print("Train Shape:", train_labels.shape)
print("Val Shape:", validation_labels.shape)

In [None]:
train_data.shape

# Step 2: Train the Models
Define Hyperparmeters, put the model together, train it, and then repeat until the model's preformance looks good


In [None]:
learning_rate = 0.5e-3
epochs = 15
batch_size = 50
input_size = 3041
num_classes = 14

In [None]:
model = LeNet(input_size, num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, eps=1e-5)
crossentropy_loss_function = torch.nn.CrossEntropyLoss()

In [None]:
train_dataset = DiffractionDataset(num_classes, 1e-3, train_data, labels=train_labels, unsupervised=False, categorical='Bravais Lattice')
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)

validation_dataset = DiffractionDataset(num_classes, 1e-3, validation_data, labels = validation_labels, unsupervised=False, categorical='Bravais Lattice')
val_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, num_workers=1, pin_memory=True)


In [None]:
start = datetime.now()
for epoch in range(epochs):
    model.train()
    total_loss, total_correct, total_samples = 0, 0, 0
    # Training Loop
    i = 0
    for imgs, labels in train_loader:
        i+=1
        optimizer.zero_grad()
        logits = model(imgs) #compute Predictions
        loss = crossentropy_loss_function(logits, labels) #compute Loss
        loss.backward() #update Model
        optimizer.step()

        acc = (logits.argmax(dim=-1) == labels).float().mean().item()
        total_loss += loss.item()
        total_correct += (logits.argmax(dim=-1) == labels).sum().item()
        total_samples += labels.size(0)
        print("[Epoch %d/%d] [%d] [Batch loss: %.2f  Batch Acc: %d%%]"% (epoch+1, epochs, i, loss.item(), 100 * acc))


    train_acc = total_correct / total_samples * 100
    print(f"Epoch {epoch+1} Training Accuracy: {train_acc:.2f}%")


    model.eval()
    total_correct, total_samples = 0, 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            logits = model(imgs)
            total_correct += (logits.argmax(dim=-1) == labels).sum().item()
            total_samples += labels.size(0)

    val_acc = total_correct / total_samples * 100
    print(f"[Epoch %d/%d] [Training Loss: %d%%  Validation Loss: %d%%] " % (epoch+1, epochs, (train_acc),(val_acc)))
totalTime = datetime.now() - start
print(f"Total Train Time: {totalTime}")

In [None]:
#if you want to save model(sometimes weird in collab, just add path to PATH)
#torch.save({f'Model | {epochs} {batch_size} {learning_rate} | {datetime.now()}': model.state_dict(),'Optimizer | {epochs} {batch_size} {learning_rate} | {time}': optimizer.state_dict()}, 'PATH')

# Step 3: Analyze Model Results

Now its time to grab our test tensor, and look at how our model did. While we could go back and re-train our model to get better results, its common practice in ML to not do this.

In [None]:
test = torch.load("100k_COD_TestTensor.pt")
test_dataset = DiffractionDataset(num_classes, 1e-3, test['X'], labels=test['Y'], unsupervised=False, categorical='Bravais Lattice')
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=1, pin_memory=True)
model.eval() #Prints Model (Encoder -> Convolutional Layers, Classifier -> Normal ANN)

In [None]:
input_example = test_dataset[0][0].unsqueeze(0)
label_example = test_dataset[0][1]
with torch.no_grad():
    output_example = model(input_example)
    example_pred = torch.argmax(output_example, dim=1).item()  # get predicted class

In [None]:
print("Example of Input(Diffraction Pattern)")
print(input_example)

Logits are the direct output from the model for classificatio. Whatever class maps to a certain index, the number at that index in the array represents the models "confidence" that the input maps to that class

In [None]:
print("Example of Logits")
print(output_example)

In [None]:
print(f"Numeric Class: {example_pred}")
print(f"Bravais Lattice: {mapping['Bravais Lattice'][example_pred]}")

In [None]:
print(label_example) #Hopefully its the same

### Analyze using test-set

In [None]:
listofpreds = []
listoflabels = []

count = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        count+=1
        outputs = model(inputs)
        predictions = torch.argmax(outputs, dim=1)
        listofpreds.extend(predictions)
        listoflabels.extend(labels)
accuracy = np.mean(np.array(listofpreds) == np.array(listoflabels))
print(f"Test Accuracy: {accuracy * 100}%")
rmse = np.sqrt(mean_squared_error(listoflabels, listofpreds))
print(f"Test Root Sqaured Mean Error(RSME): {rmse}")

conf_matrix = confusion_matrix(listoflabels, listofpreds)
plt.figure(figsize=(10, 8))
disp = ConfusionMatrixDisplay(conf_matrix)
disp.plot(cmap=plt.cm.Reds, values_format='d')
plt.title("Confusion Matrix for bravais lattice")
plt.show()