In [2]:
from datasets import load_dataset
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
from torchvision.models import SqueezeNet
from torchvision import datasets
from PIL import Image
import io
import pandas as pd


In [95]:
dataset = pd.read_parquet('C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0000.parquet')

In [96]:
byte_images = []
for sample in dataset['image']:
    image = sample['bytes']
    byte_images.append(image)

In [97]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Adjust the size accordingly
    transforms.ToTensor(),
])

In [98]:
tensor_images = []
for byte_image in byte_images:
    pil_image = Image.open(io.BytesIO(byte_image))
    tensor_image = transform(pil_image)
    tensor_images.append(tensor_image)

In [99]:
processed_images = torch.stack(tensor_images)

In [100]:
labels = torch.tensor(dataset['style'])

In [101]:
# Define the SqueezeNet model
model = SqueezeNet(num_classes=27)  # Assuming you have 27 classes

In [102]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [103]:
# Create a DataLoader
dataset = TensorDataset(processed_images, labels)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [108]:
# Training loop
epochs = 25
for epoch in range(epochs):
    for batch_images, batch_labels in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_images)
        loss = criterion(outputs, batch_labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}')

# Save the trained model
torch.save(model.state_dict(), 'C:\\Users\\elain\\Documents\\Stern\\BAC\\Advanced Team\\squeezenet_model.pth')

Epoch 1/25, Loss: 2.3780694007873535
Epoch 2/25, Loss: 3.2153403759002686
Epoch 3/25, Loss: 2.0062568187713623
Epoch 4/25, Loss: 1.9174505472183228
Epoch 5/25, Loss: 1.6955028772354126
Epoch 6/25, Loss: 2.4997990131378174
Epoch 7/25, Loss: 2.2080767154693604
Epoch 8/25, Loss: 2.5019681453704834
Epoch 9/25, Loss: 1.8774456977844238
Epoch 10/25, Loss: 2.168994665145874
Epoch 11/25, Loss: 2.1647799015045166
Epoch 12/25, Loss: 1.6907739639282227
Epoch 13/25, Loss: 2.3852994441986084
Epoch 14/25, Loss: 2.6035573482513428
Epoch 15/25, Loss: 2.594700813293457
Epoch 16/25, Loss: 1.9873648881912231
Epoch 17/25, Loss: 2.2288734912872314
Epoch 18/25, Loss: 1.885496735572815
Epoch 19/25, Loss: 2.407972574234009
Epoch 20/25, Loss: 1.6160016059875488
Epoch 21/25, Loss: 1.9925752878189087
Epoch 22/25, Loss: 2.573483943939209
Epoch 23/25, Loss: 2.29242205619812
Epoch 24/25, Loss: 2.3178160190582275
Epoch 25/25, Loss: 1.9291902780532837


In [3]:
### TEST DATA ###
dataset = pd.read_parquet('C:\\Users\\elain\Documents\\Stern\\BAC\\Advanced Team\\0001.parquet')

In [4]:
byte_images = []
for sample in dataset['image']:
    image = sample['bytes']
    byte_images.append(image)

In [5]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Adjust the size accordingly
    transforms.ToTensor(),
])

In [6]:
tensor_images = []
for byte_image in byte_images:
    pil_image = Image.open(io.BytesIO(byte_image))
    tensor_image = transform(pil_image)
    tensor_images.append(tensor_image)

In [7]:
processed_images = torch.stack(tensor_images)

In [8]:
labels = torch.tensor(dataset['style'])

In [9]:
# Create an instance of the SqueezeNet model
model = SqueezeNet(num_classes=27)

# Load the trained weights
model.load_state_dict(torch.load('C:\\Users\\elain\\Documents\\Stern\\BAC\\Advanced Team\\squeezenet_model.pth'))
model.eval()  # Set the model to evaluation mode

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(96, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (5): Fire(
   

In [10]:
evaluation_data = processed_images
evaluation_labels = labels

In [11]:
with torch.no_grad():
    batch_size = 32  # Adjust the batch size as needed
    outputs_list = []

    for start in range(0, len(evaluation_data), batch_size):
        end = min(start + batch_size, len(evaluation_data))
        batch_data = evaluation_data[start:end]
        
        outputs = model(batch_data)
        outputs_list.append(outputs)

    outputs = torch.cat(outputs_list, dim=0)
    predictions = torch.argmax(outputs, dim=1)

In [12]:
ground_truth = evaluation_labels
correct_predictions = (predictions == ground_truth).sum().item()
total_samples = len(evaluation_labels)
accuracy = correct_predictions / total_samples
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 28.27%


In [13]:
ground_truth

tensor([ 3, 20, 20,  ..., 23, 21, 21])

In [14]:
predictions

tensor([12, 21, 12,  ..., 23, 21, 12])