## Imports

In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
!pip install torch torchvision

In [None]:
!pip install timm

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import scipy.io as sio
from characterDefinitions import getHandwritingCharacterDefinitions
from torchvision.models import resnet50
import numpy as np

from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import timm

  from .autonotebook import tqdm as notebook_tqdm


## Load the Dataset

In [3]:
topDirs = ['Datasets']
dataDirs = ['t5.2019.05.08','t5.2019.11.25','t5.2019.12.09','t5.2019.12.11','t5.2019.12.18',
            't5.2019.12.20','t5.2020.01.06','t5.2020.01.08','t5.2020.01.13','t5.2020.01.15']
charDef = getHandwritingCharacterDefinitions()

In [4]:
all_tensors = []
all_labels = []
for directory in dataDirs:
    
    mat = f'./Datasets/{directory}/singleLetters.mat'
    data = sio.loadmat(mat)
    ctr = 0
    for letter in charDef['charList']:
        t = torch.Tensor(data[f'neuralActivityCube_{letter}'])
        qty = t.shape[0]
        labels = torch.Tensor([ctr]*qty)
        ctr += 1
#         if t.shape[0] == 27:
        all_tensors.append(t)
        all_labels.append(labels)

tensor_data = torch.cat(all_tensors, dim=0)
tensor_data = np.repeat(tensor_data[..., np.newaxis], 3, -1).transpose(-1,-2).transpose(-2,-3)

# tensor_data = tensor_data.transpose(-1,0).transpose(-1,-2)
tensor_labels = torch.cat(all_labels).long()


In [None]:
tensor_data.shape

In [None]:
x_pad = torch.zeros(3627, 3, 224-201, 192)
y_pad = torch.zeros(3627, 3, 224, 224 - 192)

In [None]:
tensor_data = torch.cat([tensor_data, x_pad], dim=2)

In [None]:
tensor_data = torch.cat([tensor_data, y_pad], dim=3)

In [5]:
# tensor_data=rgb_data
tensor_data.shape

torch.Size([3627, 3, 201, 192])

In [5]:
from torch.utils.data import random_split

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

dataset = TensorDataset(tensor_data, tensor_labels)
train_data, test_data = random_split(dataset, [3000, 627])
batch_size = 32
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [None]:
class MyCNN(nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        # Define the layers of your CNN
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(16 * 201 * 192, 31)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x



In [None]:
class MyCNN(nn.Module):
    def __init__(self, num_classes):
        super(MyCNN, self).__init__()
        self.resnet = resnet50(pretrained=True)
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_features, num_classes)

    def forward(self, x):
        x = self.resnet(x)
        return x

In [6]:
class ViT(nn.Module):
    def __init__(self, num_classes=10,  pretrained=False):
        # super(ViT, self).__init__()
        # self.model = timm.create_model('vit_base_patch16_224', pretrained=pretrained)
        # # Modify the head for your number of classes
        # num_patches = (img_size[0] // 16) * (img_size[1] // 16)
        # self.model.patch_embed = nn.Conv2d(3, self.model.embed_dim, kernel_size=16, stride=16, padding=0)
        # self.model.cls_token = nn.Parameter(torch.rand(1, 1, self.model.embed_dim))

        # # Modify the head for your number of classes
        # self.model.head = nn.Linear(self.model.head.in_features, num_classes)

        super(ViT, self).__init__()
        self.model = timm.create_model('vit_base_patch16_224', pretrained=pretrained)
        # Modify the head for your number of classes
        self.model.head = nn.Linear(self.model.head.in_features, num_classes)

        
    def forward(self, x):
        return self.model(x)

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
# from torchvision.datasets import YourDataset  # Replace YourDataset with the actual dataset you are using

# Define the CNN layer
class ResizeCNN(nn.Module):
    def __init__(self):
        super(ResizeCNN, self).__init__()
        
        # Define layers for upsampling
        self.upsample = nn.Sequential(
            nn.ConvTranspose2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(256, 3, kernel_size=4, stride=2, padding=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.upsample(x)


# Define the complete model
class YourModel(nn.Module):
    def __init__(self, num_classes):
        super(YourModel, self).__init__()
        self.cnn_layer = ResizeCNN()
        self.resize = nn.AdaptiveAvgPool2d((224, 224))
        self.vit_layer = ViT(num_classes=num_classes, pretrained=True)
        # self.fc = nn.Linear(vit_output_size, num_classes)  # Adjust vit_output_size to the actual output size of ViT

    def forward(self, x):
        x = self.cnn_layer(x)
        print(x.shape)
        x = self.resize(x)
        print(x.shape)
        x = self.vit_layer(x)
        print(x.shape)
        x = x.view(x.size(0), -1)  # Flatten the output for the fully connected layer
        # x = self.fc(x)
        return x




In [8]:
if torch.cuda.is_available():
    device = torch.device(0)
else:
    device = torch.device('cpu')
print(device)

cuda:0


In [9]:
model = YourModel(num_classes=31)

In [10]:
# Step 4: Model Compilation
# model = MyCNN(31)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [11]:
num_epochs = 100
batch_size = 32

In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)


cuda:0


YourModel(
  (cnn_layer): ResizeCNN(
    (upsample): Sequential(
      (0): ConvTranspose2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): ConvTranspose2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): ConvTranspose2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (5): ReLU(inplace=True)
      (6): ConvTranspose2d(256, 3, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (7): ReLU(inplace=True)
    )
  )
  (resize): AdaptiveAvgPool2d(output_size=(224, 224))
  (vit_layer): ViT(
    (model): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (patch_drop): Identity()
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((768,), eps=1e-06, elementwise

In [19]:
model(torch.tensor(np.random.random((32,3,201,192)),dtype=float).to(device))

RuntimeError: Input type (torch.cuda.DoubleTensor) and weight type (torch.cuda.FloatTensor) should be the same

In [1]:
import torch.nn.functional as F

for epoch in range(num_epochs):
    model.train()
    print(f'epoch {epoch}')
    for batch in train_dataloader:
        inputs, labels = batch
        inputs = inputs.to(device)
        labels= labels.to(device)

        # inputs = F.interpolate(inputs, size=(224,224), mode='area')
        optimizer.zero_grad()
        outputs = model(inputs).to(device)  # Add a channel dimension to the input
        loss = criterion(outputs, labels).to(device)
        loss.backward()
        optimizer.step()
    
    # Step 6: Model Evaluation
    model.eval()
    with torch.no_grad():
        cumulative_accuracy = torch.tensor([]).to(device)
        for batch in test_dataloader:
            inputs, labels = batch
            inputs = inputs.to(device)
            labels = labels.to(device)
            # inputs = F.interpolate(inputs, size=(224,224), mode='area')

            val_outputs = model(inputs).to(device)
            val_loss = criterion(val_outputs, labels).to(device)
            val_predictions = torch.argmax(val_outputs, dim=1).to(device)
            val_accuracy = (val_predictions == labels).float().to(device)
            cumulative_accuracy = torch.cat([cumulative_accuracy,val_accuracy], dim=0).to(device)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss.item():.4f}, Validation Accuracy: {cumulative_accuracy.mean().item():.4f}")

NameError: name 'num_epochs' is not defined

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the Transformer model
class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim=4, nhead=2, num_layers=2):
        super(TransformerModel, self).__init__()
        self.transformer = nn.Transformer(
            d_model=input_dim,
            nhead=nhead,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=hidden_dim,
        )
        self.fc = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = self.transformer(x)
        x = x.mean(dim=1)  # Global average pooling
        x = self.fc(x)
        return x

# Create the Transformer model
input_dim = 192 * 201  # Assuming your tensors are flattened to a 1D array
output_dim = 31  # Number of classes (letters a through z)
model = TransformerModel(input_dim, output_dim)

model = model.to(device)

In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001).to(device)

# Convert your data to PyTorch tensors and loaders
# Assuming you have a dataset named 'dataset' and a DataLoader named 'dataloader'

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_dataloader:
        optimizer.zero_grad()
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")

# Testing loop
model.eval()
total_correct = 0
total_samples = 0
with torch.no_grad():
    for inputs, labels in test_dataloader:  # Assuming you have a separate test dataset
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()

accuracy = total_correct / total_samples
print(f"Accuracy on test set: {accuracy}")

In [None]:
from tqdm import tqdm 
for epoch in range(10):
    model.train()
    running_loss = 0.0

    for inputs, labels in tqdm(train_dataloader, desc=f'Epoch {epoch + 1}/{num_epochs}', unit='batch'):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(train_dataloader)
    print(f'Training Loss: {epoch_loss:.4f}')

# Save the trained model
torch.save(model.state_dict(), 'vit_model.pth')