In [None]:
print("Hello")

In [None]:
import cv2
import torch

import torch.nn.init as init
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
         #input shape is [batch=32,channels=3,size = 128*128] and 3 filters
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=5, kernel_size=3, padding=1) 
        #new shape = [32,3,128,128]
        # self.bn1 = nn.BatchNorm2d(5)
        self.conv2 = nn.Conv2d(in_channels=5, out_channels=5, kernel_size=3, padding=1)
        #new shape = [32,5,128,128]
        self.bn2 = nn.BatchNorm2d(5)
        self.conv3 = nn.Conv2d(in_channels=5, out_channels=6, kernel_size=3, padding=1)
        #new shape = [32,6,64,64]
        # self.bn3 = nn.BatchNorm2d(7)
        self.conv4 = nn.Conv2d(in_channels=6, out_channels=6, kernel_size=3, padding=1)
        #new shape = [32,6,64,64]
        self.bn4 = nn.BatchNorm2d(6)
        self.conv5 = nn.Conv2d(in_channels=6, out_channels=8, kernel_size=3, padding=1)
        #new shape = [32,8,32,32]
        # self.bn5 = nn.BatchNorm2d(9)
        self.conv6 = nn.Conv2d(in_channels=8, out_channels=8, kernel_size=3, padding=1)
        #new shape = [32,8,16,16]
        self.bn6 = nn.BatchNorm2d(8)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(in_features=8 * 16 * 16, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=35)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = self.pool(F.relu(self.conv5(x)))
        x = self.pool(F.relu(self.bn6(self.conv6(x))))
        x = x.view(-1, 8 * 16 * 16)
        x = F.relu(self.dropout(self.fc1(x)))
        x = self.fc2(x)
        # x = F.softmax(x, dim=1)  # Apply softmax activation
        return x


    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                init.constant_(m.bias, 0)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = CNN().to(device)

# Load the saved model
model = CNN().to(device)
# Load the checkpoint file on the CPU
checkpoint = torch.load('checkpointfinal2.pth', map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['modelfinal1'])
model.eval()

# Label to Index Mapping
label_dict = {0: '1', 1: '2', 2: '3', 3: '4', 4: '5', 5: '6', 6: '7', 7: '8', 8: '9', 9: 'A', 10: 'B', 11: 'C', 12: 'D',
              13: 'E', 14: 'F', 15: 'G', 16: 'H', 17: 'I', 18: 'J', 19: 'K', 20: 'L', 21: 'M', 22: 'N', 23: 'O', 24: 'P',
              25: 'Q', 26: 'R', 27: 'S', 28: 'T', 29: 'U', 30: 'V', 31: 'W', 32: 'X', 33: 'Y', 34: 'Z'}


cam = cv2.VideoCapture(0)
while True:
    ret, frame = cam.read()
    if not ret:
        print("Failed to read frame from camera")
        break
    frame = cv2.flip(frame, 1)
    cv2.rectangle(frame, (319, 9), (620 + 1, 309), (0, 255, 0), 1)
    roi = frame[10:300, 320:620]

    pil_image = Image.fromarray(roi)
    # Apply transformations to the PIL Image
    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    test_image_tensor = transform(pil_image).unsqueeze(0).to(device)
    # test_image_tensor = transform(roi).unsqueeze(0).to(device)
    # Make a prediction on the test image
    with torch.no_grad():
        # output = model(test_image_tensor)
        # _, predicted = torch.max(output.data, 1)
        # # predicted_label = my_dataset.idx_to_label[predicted.item()]
        # predicted_label = label_dict[predicted.item()]
        # # predicted_label = label_dict[predicted.item()]
        # # Display the predicted label on the frame
        outputs = model(test_image_tensor)
        _, predicted = torch.max(outputs.data, 1)
        predicted_label = label_dict[predicted.item()]
        cv2.putText(frame, predicted_label, (10, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 1)

    # Display the frame
    cv2.imshow("Frame", frame)
    k = cv2.waitKey(50) & 0xFF
    if k == 27:
        break

cam.release()
cv2.destroyAllWindows()