In [19]:
# 1. Dependencies and Imports
import os
import random

torch_import = True
import torch
from torch import nn, optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

from PIL import Image
import cv2
import numpy as np

# Mapping folder names to actual symbol characters
symbol_map = {'+': '+', '-': '-', 'div': '/', 'times': '*'}

def class_to_symbol(c):
    """Convert a class name to its corresponding symbol (or return digits unchanged)."""
    return symbol_map.get(c, c)

In [20]:
# 2. Dataset Loading using ImageFolder
# Directory containing subfolders for each symbol class (digits and ops)
data_dir = './extracted_images'

# Data transforms for 28x28 grayscale normalization
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load dataset
dataset = datasets.ImageFolder(data_dir, transform=transform)
classes = dataset.classes  # e.g. ['0','1',...,'plus','minus','div','times']
num_classes = len(classes)
print("Detected classes:", classes)

# Split into train/test
total = len(dataset)
train_size = int(0.8 * total)
test_size = total - train_size
train_ds, test_ds = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=1000, shuffle=False)

# Build mapping from class names to image file paths for demos
class_to_paths = defaultdict(list)
for path, idx in dataset.samples:
    cls = classes[idx]
    class_to_paths[cls].append(path)

Detected classes: ['+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '=', 'div', 'times']


In [21]:
# 3. Model Definition
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = nn.functional.relu(self.conv2(x))
        x = nn.functional.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = nn.functional.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)
        return nn.functional.log_softmax(x, dim=1)

# Initialize model and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleCNN(num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [22]:
# 4. Training and Testing Functions
def train(model, device, loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = nn.functional.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f"Epoch {epoch} [{batch_idx*len(data)}/{len(loader.dataset)}] Loss={loss.item():.4f}")


def test(model, device, loader):
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += nn.functional.nll_loss(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(loader.dataset)
    accuracy = 100. * correct / len(loader.dataset)
    print(f"Test: Avg loss={test_loss:.4f}, Accuracy={accuracy:.2f}%")

In [23]:
# 5. Random Equation Generation and Solving
def generate_and_solve_random_equations(model, device, class_to_paths, classes, transform, num_eq=5):
    digits = [c for c in classes if c.isdigit()]
    ops    = [c for c in classes if not c.isdigit()]
    for _ in range(num_eq):
        a_cls = random.choice(digits)
        b_cls = random.choice(digits)
        op_cls = random.choice(ops)
        paths = [random.choice(class_to_paths[c]) for c in (a_cls, op_cls, b_cls)]

        def predict(path):
            img = Image.open(path).convert('L')
            img = transform(img).unsqueeze(0).to(device)
            with torch.no_grad(): out = model(img)
            pred_cls = classes[out.argmax(dim=1).item()]
            return class_to_symbol(pred_cls)

        da = predict(paths[0])
        dop = predict(paths[1])
        db = predict(paths[2])
        expr_true = f"{class_to_symbol(a_cls)}{class_to_symbol(op_cls)}{class_to_symbol(b_cls)}"
        expr_det  = f"{da}{dop}{db}"
        try: res_true = eval(expr_true)
        except: res_true = None
        try: res_det = eval(expr_det)
        except: res_det = None
        print(f"True: {expr_true}={res_true} | Detected: {expr_det}={res_det}")

In [24]:
# 6. Webcam OCR Function
def run_webcam(model, device, transform, classes):
    cap = cv2.VideoCapture(0)
    while True:
        ret, frame = cap.read()
        if not ret: break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        boxes = sorted([cv2.boundingRect(cnt) for cnt in contours if cv2.contourArea(cnt)>100], key=lambda b: b[0])
        expr = ''
        for x,y,w,h in boxes:
            roi = cv2.resize(thresh[y:y+h, x:x+w], (28,28))
            roi = (255-roi).astype(np.float32)/255
            tensor = torch.from_numpy((roi-0.5)/0.5).unsqueeze(0).unsqueeze(0).to(device)
            with torch.no_grad(): out = model(tensor)
            pred_cls = classes[out.argmax(dim=1).item()]
            ch = class_to_symbol(pred_cls)
            expr += ch
            cv2.rectangle(frame,(x,y),(x+w,y+h),(0,255,0),1)
            cv2.putText(frame,ch,(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),2)
        try: res = eval(expr); disp=f"{expr}={res}"
        except: disp = expr
        cv2.putText(frame,disp,(10,30),cv2.FONT_HERSHEY_SIMPLEX,1,(255,0,0),2)
        cv2.imshow('Math OCR', frame)
        if cv2.waitKey(1)&0xFF==ord('q'): break
    cap.release(); cv2.destroyAllWindows()

In [None]:
# 7. Execute Workflow
for epoch in range(1, 7):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

Epoch 1 [0/136471] Loss=2.7081
Epoch 1 [6400/136471] Loss=0.6946
Epoch 1 [12800/136471] Loss=0.5728
Epoch 1 [19200/136471] Loss=0.1383
Epoch 1 [25600/136471] Loss=0.1427
Epoch 1 [32000/136471] Loss=0.3257
Epoch 1 [38400/136471] Loss=0.3172
Epoch 1 [44800/136471] Loss=0.1324
Epoch 1 [51200/136471] Loss=0.3750
Epoch 1 [57600/136471] Loss=0.1909
Epoch 1 [64000/136471] Loss=0.2554
Epoch 1 [70400/136471] Loss=0.0453
Epoch 1 [76800/136471] Loss=0.4872
Epoch 1 [83200/136471] Loss=0.0396
Epoch 1 [89600/136471] Loss=0.3049
Epoch 1 [96000/136471] Loss=0.0623
Epoch 1 [102400/136471] Loss=0.1631
Epoch 1 [108800/136471] Loss=0.1720
Epoch 1 [115200/136471] Loss=0.2666
Epoch 1 [121600/136471] Loss=0.0684
Epoch 1 [128000/136471] Loss=0.1364
Epoch 1 [134400/136471] Loss=0.0749
Test: Avg loss=0.0461, Accuracy=98.86%
Epoch 2 [0/136471] Loss=0.0627
Epoch 2 [6400/136471] Loss=0.0757
Epoch 2 [12800/136471] Loss=0.0451
Epoch 2 [19200/136471] Loss=0.0294
Epoch 2 [25600/136471] Loss=0.0679
Epoch 2 [32000/13647

In [26]:
print("\nRandom Equations:")
generate_and_solve_random_equations(model, device, class_to_paths, classes, transform)

input("Press Enter to start webcam OCR (q to quit)...")
run_webcam(model, device, transform, classes)


Random Equations:
True: 3/4=0.75 | Detected: 3/4=0.75
True: 7+6=13 | Detected: 7+6=13
True: 1/8=0.125 | Detected: 1/8=0.125
True: 1+3=4 | Detected: 1+3=4
True: 9-4=5 | Detected: 9-4=5
