In [1]:
from datasets import load_dataset
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
from torchvision.models import SqueezeNet
from torchvision import datasets
from PIL import Image
import io
import pandas as pd

In [29]:
file_paths = [
    'C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0000.parquet',
    'C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0001.parquet',
    'C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0002.parquet',
    'C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0003.parquet',
    'C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0004.parquet',
    'C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0005.parquet',
    'C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0006.parquet',
    'C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0007.parquet',
    'C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0008.parquet',
    'C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0009.parquet'
]

# Read all datasets into a list of DataFrames
datasets = [pd.read_parquet(file) for file in file_paths]

# Concatenate all DataFrames into one
dataset = pd.concat(datasets, ignore_index=True)

In [30]:
byte_images = []
for sample in dataset['image']:
    image = sample['bytes']
    byte_images.append(image)

In [31]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Adjust the size accordingly
    transforms.ToTensor(),
])

In [32]:
tensor_images = []
for byte_image in byte_images:
    pil_image = Image.open(io.BytesIO(byte_image))
    tensor_image = transform(pil_image)
    tensor_images.append(tensor_image)

In [33]:
processed_images = torch.stack(tensor_images)

In [34]:
labels = torch.tensor(dataset['style'])

In [35]:
# Define the SqueezeNet model
model = SqueezeNet(num_classes=27)  # Assuming you have 27 classes

In [36]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [37]:
processed_images.shape

torch.Size([11320, 3, 224, 224])

In [38]:
labels.shape

torch.Size([11320])

In [40]:
# Create a DataLoader
dataset = TensorDataset(processed_images, labels)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [41]:
# Training loop
epochs = 25
for epoch in range(epochs):
    for batch_images, batch_labels in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_images)
        loss = criterion(outputs, batch_labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}')

# Save the trained model
torch.save(model.state_dict(), 'C:\\Users\\elain\\Documents\\Stern\\BAC\\Advanced Team\\squeezenet_model1.pth')

Epoch 1/25, Loss: 1.8265233039855957
Epoch 2/25, Loss: 2.4173967838287354
Epoch 3/25, Loss: 2.165327548980713
Epoch 4/25, Loss: 1.7908334732055664
Epoch 5/25, Loss: 1.5851420164108276
Epoch 6/25, Loss: 2.312936305999756
Epoch 7/25, Loss: 1.7462571859359741
Epoch 8/25, Loss: 1.9578033685684204
Epoch 9/25, Loss: 1.6886777877807617
Epoch 10/25, Loss: 1.9299274682998657
Epoch 11/25, Loss: 1.8650957345962524
Epoch 12/25, Loss: 2.0737521648406982
Epoch 13/25, Loss: 1.5097427368164062
Epoch 14/25, Loss: 1.6966453790664673
Epoch 15/25, Loss: 1.864454746246338
Epoch 16/25, Loss: 1.4054079055786133
Epoch 17/25, Loss: 2.4108726978302
Epoch 18/25, Loss: 1.0860494375228882
Epoch 19/25, Loss: 1.278841257095337
Epoch 20/25, Loss: 1.4197627305984497
Epoch 21/25, Loss: 1.028377652168274
Epoch 22/25, Loss: 1.4506558179855347
Epoch 23/25, Loss: 1.3416675329208374
Epoch 24/25, Loss: 0.9758595824241638
Epoch 25/25, Loss: 1.112046241760254


In [42]:
# Create an instance of the SqueezeNet model
model = SqueezeNet(num_classes=27)

# Load the trained weights
model.load_state_dict(torch.load('C:\\Users\\elain\\Documents\\Stern\\BAC\\Advanced Team\\squeezenet_model1.pth'))
model.eval()  # Set the model to evaluation mode

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(96, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (5): Fire(
   

In [43]:
### TEST DATA ###
test_dataset = pd.read_parquet('C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\train-00010-of-00072.parquet')

In [44]:
test_byte_images = []
for sample in test_dataset['image']:
    image = sample['bytes']
    test_byte_images.append(image)

In [45]:
test_tensor_images = []
for byte_image in test_byte_images:
    pil_image = Image.open(io.BytesIO(byte_image))
    tensor_image = transform(pil_image)
    test_tensor_images.append(tensor_image)

In [46]:
test_processed_images = torch.stack(test_tensor_images)

In [47]:
test_labels = torch.tensor(test_dataset['style'])

In [48]:
evaluation_data = test_processed_images
evaluation_labels = test_labels

In [49]:
with torch.no_grad():
    batch_size = 32  # Adjust the batch size as needed
    outputs_list = []

    for start in range(0, len(evaluation_data), batch_size):
        end = min(start + batch_size, len(evaluation_data))
        batch_data = evaluation_data[start:end]
        
        outputs = model(batch_data)
        outputs_list.append(outputs)

    outputs = torch.cat(outputs_list, dim=0)
    predictions = torch.argmax(outputs, dim=1)

In [50]:
ground_truth = evaluation_labels
correct_predictions = (predictions == ground_truth).sum().item()
total_samples = len(evaluation_labels)
accuracy = correct_predictions / total_samples
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 53.98%


In [51]:
ground_truth

tensor([23, 23, 12,  ..., 12, 21, 12])

In [52]:
predictions

tensor([23, 23, 12,  ..., 12, 21, 12])

In [53]:
for p in predictions:
    print(p)

tensor(23)
tensor(23)
tensor(12)
tensor(23)
tensor(12)
tensor(4)
tensor(21)
tensor(23)
tensor(20)
tensor(21)
tensor(4)
tensor(12)
tensor(24)
tensor(12)
tensor(12)
tensor(12)
tensor(12)
tensor(12)
tensor(7)
tensor(3)
tensor(3)
tensor(12)
tensor(12)
tensor(21)
tensor(17)
tensor(24)
tensor(21)
tensor(12)
tensor(21)
tensor(21)
tensor(12)
tensor(12)
tensor(24)
tensor(21)
tensor(12)
tensor(21)
tensor(21)
tensor(17)
tensor(24)
tensor(12)
tensor(3)
tensor(23)
tensor(12)
tensor(12)
tensor(15)
tensor(21)
tensor(12)
tensor(12)
tensor(4)
tensor(15)
tensor(24)
tensor(21)
tensor(24)
tensor(3)
tensor(12)
tensor(4)
tensor(21)
tensor(12)
tensor(20)
tensor(12)
tensor(12)
tensor(17)
tensor(12)
tensor(12)
tensor(12)
tensor(12)
tensor(21)
tensor(21)
tensor(12)
tensor(17)
tensor(23)
tensor(12)
tensor(21)
tensor(23)
tensor(7)
tensor(12)
tensor(21)
tensor(7)
tensor(12)
tensor(12)
tensor(4)
tensor(12)
tensor(12)
tensor(12)
tensor(3)
tensor(21)
tensor(12)
tensor(12)
tensor(12)
tensor(15)
tensor(12)
tensor(12)
t