In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from preprocess import preprocess_image  # Import from preprocess.py
from model import OCRModel  # Import from separate.py
from dataload import bangla_load, english_load

# Load Bangla and English data loaders
bangla_train_loader, bangla_val_loader, bangla_test_loader, bangla_num_classes = bangla_load
english_train_loader, english_val_loader, english_test_loader, english_num_classes = english_load

# Load pre-trained models with weights_only=True
bangla_model = OCRModel(bangla_num_classes)
english_model = OCRModel(english_num_classes)
bangla_model.load_state_dict(torch.load("saved_models/bangla_model.pth", weights_only=True)['model_state_dict'])
english_model.load_state_dict(torch.load("saved_models/english_model.pth", weights_only=True)['model_state_dict'])

# Combined model
class CombinedModel(nn.Module):
    def __init__(self, bangla_model, english_model, combined_classes):
        super(CombinedModel, self).__init__()
        self.bangla_feature_extractor = nn.Sequential(*list(bangla_model.children())[:-2])  # Up to LSTM
        self.bangla_lstm = list(bangla_model.children())[-2]  # LSTM layer
        self.english_feature_extractor = nn.Sequential(*list(english_model.children())[:-2])  # Up to LSTM
        self.english_lstm = list(english_model.children())[-2]  # LSTM layer
        
        # Joint layers
        self.fc1 = nn.Linear(512, 32)
        self.fc2 = nn.Linear(32, combined_classes)

    def forward(self, bangla_x, english_x):
        # Bangla feature extraction
        bangla_features = self.bangla_feature_extractor(bangla_x)  # 4D output
        bangla_features = bangla_features.permute(0, 2, 3, 1).reshape(bangla_features.size(0), -1, 1024)
        bangla_features, _ = self.bangla_lstm(bangla_features)  # LSTM processing
        
        # English feature extraction
        english_features = self.english_feature_extractor(english_x)  # 4D output
        english_features = english_features.permute(0, 2, 3, 1).reshape(english_features.size(0), -1, 1024)
        english_features, _ = self.english_lstm(english_features)  # LSTM processing

        # Concatenate features
        combined_features = torch.cat((bangla_features[:, -1, :], english_features[:, -1, :]), dim=1)

        # Fully connected layers
        x = torch.relu(self.fc1(combined_features))
        x = self.fc2(x)
        return x

class CombinedDataset(Dataset):
    def __init__(self, bangla_loader, english_loader):
        self.bangla_data = list(bangla_loader.dataset)
        self.english_data = list(english_loader.dataset)

        # Match lengths
        min_length = min(len(self.bangla_data), len(self.english_data))
        self.bangla_data = self.bangla_data[:min_length]
        self.english_data = self.english_data[:min_length]

        # Unique combined labels
        self.combined_labels = set(
            hash((f'b{self.bangla_data[idx][1]}', f'e{self.english_data[idx][1]}'))
            for idx in range(len(self.bangla_data))
        )

    def __len__(self):
        return len(self.bangla_data)

    def __getitem__(self, idx):
        bangla_image, bangla_label = self.bangla_data[idx]
        english_image, english_label = self.english_data[idx]

        # Unique label
        combined_label = hash((f'b{bangla_label}', f'e{english_label}')) % len(self.combined_labels)
        return bangla_image, english_image, combined_label

    @property
    def num_classes(self):
        return len(self.combined_labels)

# Create combined dataset and dataloader
combined_dataset = CombinedDataset(bangla_train_loader, english_train_loader)
combined_dataloader = DataLoader(combined_dataset, batch_size=32, shuffle=True)

# Initialize combined model
combined_classes = combined_dataset.num_classes
combined_model = CombinedModel(bangla_model, english_model, combined_classes)

# Unfreeze all parameters in the combined model
for param in combined_model.parameters():
    param.requires_grad = True

# Define the optimizer to update all layers
optimizer = optim.Adam(combined_model.parameters(), lr=0.0001)

# Loss function
criterion = nn.CrossEntropyLoss()

# Training Function
def train_combined_model(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    history = {
        "train_loss": [],
        "train_accuracy": [],
        "val_loss": [],
        "val_accuracy": []
    }
    
    for epoch in range(epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for bangla_images, english_images, labels in train_loader:
            bangla_images, english_images, labels = bangla_images.cuda(), english_images.cuda(), labels.cuda()

            optimizer.zero_grad()
            outputs = model(bangla_images, english_images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct_train += (preds == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct_train / total_train

        # Validation phase
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for bangla_images, english_images, labels in val_loader:
                bangla_images, english_images, labels = bangla_images.cuda(), english_images.cuda(), labels.cuda()
                outputs = model(bangla_images, english_images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                correct_val += (preds == labels).sum().item()
                total_val += labels.size(0)

        val_loss /= len(val_loader)
        val_accuracy = 100 * correct_val / total_val

        # Update history
        history["train_loss"].append(train_loss)
        history["train_accuracy"].append(train_accuracy)
        history["val_loss"].append(val_loss)
        history["val_accuracy"].append(val_accuracy)

        print(f"Epoch {epoch + 1}/{epochs}: "
              f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")
    
    return history

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
combined_model = combined_model.to(device)


Directory: Dataset/Bangla/Dataset/Train
Checked 12000 images in total.
Removed 0 unreadable images.
Removed 0 non-image files.
Total number of images: 12000
Total number of labels: 50

Directory: Dataset/Bangla/Dataset/Test
Checked 3000 images in total.
Removed 0 unreadable images.
Removed 0 non-image files.
Total number of images: 3000
Total number of labels: 50

Directory: Dataset/English/data/training_data
Checked 20628 images in total.
Removed 0 unreadable images.
Removed 0 non-image files.
Total number of images: 20628
Total number of labels: 36

Directory: Dataset/English/data/testing_data
Checked 1008 images in total.
Removed 0 unreadable images.
Removed 0 non-image files.
Total number of images: 1008
Total number of labels: 36


In [3]:
print(combined_classes)

1798


In [2]:
combined_model.load_state_dict(torch.load("bilingual_ocr_combined_model_fully_trained.pth"))

  combined_model.load_state_dict(torch.load("bilingual_ocr_combined_model_fully_trained.pth"))


RuntimeError: Error(s) in loading state_dict for CombinedModel:
	size mismatch for fc2.weight: copying a param with shape torch.Size([1797, 32]) from checkpoint, the shape in current model is torch.Size([1798, 32]).
	size mismatch for fc2.bias: copying a param with shape torch.Size([1797]) from checkpoint, the shape in current model is torch.Size([1798]).

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from preprocess import preprocess_image  # Import from preprocess.py
from model import OCRModel  # Import from separate.py
from dataload import bangla_load, english_load

# Load Bangla and English data loaders
bangla_train_loader, bangla_val_loader, bangla_test_loader, bangla_num_classes = bangla_load
english_train_loader, english_val_loader, english_test_loader, english_num_classes = english_load

# Load pre-trained models with weights_only=True
bangla_model = OCRModel(bangla_num_classes)
english_model = OCRModel(english_num_classes)
bangla_model.load_state_dict(torch.load("saved_models/bangla_model.pth", weights_only=True)['model_state_dict'])
english_model.load_state_dict(torch.load("saved_models/english_model.pth", weights_only=True)['model_state_dict'])


Directory: Dataset/Bangla/Dataset/Train
Checked 12000 images in total.
Removed 0 unreadable images.
Removed 0 non-image files.
Total number of images: 12000
Total number of labels: 50

Directory: Dataset/Bangla/Dataset/Test
Checked 3000 images in total.
Removed 0 unreadable images.
Removed 0 non-image files.
Total number of images: 3000
Total number of labels: 50

Directory: Dataset/English/data/training_data
Checked 20628 images in total.
Removed 0 unreadable images.
Removed 0 non-image files.
Total number of images: 20628
Total number of labels: 36

Directory: Dataset/English/data/testing_data
Checked 1008 images in total.
Removed 0 unreadable images.
Removed 0 non-image files.
Total number of images: 1008
Total number of labels: 36


<All keys matched successfully>

In [2]:
# Combined model
class CombinedModel(nn.Module):
    def __init__(self, bangla_model, english_model, combined_classes):
        super(CombinedModel, self).__init__()
        self.bangla_feature_extractor = nn.Sequential(*list(bangla_model.children())[:-2])  # Up to LSTM
        self.bangla_lstm = list(bangla_model.children())[-2]  # LSTM layer
        self.english_feature_extractor = nn.Sequential(*list(english_model.children())[:-2])  # Up to LSTM
        self.english_lstm = list(english_model.children())[-2]  # LSTM layer
        
        # Joint layers
        self.fc1 = nn.Linear(512, 32)
        self.fc2 = nn.Linear(32, combined_classes)

    def forward(self, bangla_x, english_x):
        # Bangla feature extraction
        bangla_features = self.bangla_feature_extractor(bangla_x)  # 4D output
        bangla_features = bangla_features.permute(0, 2, 3, 1).reshape(bangla_features.size(0), -1, 1024)
        bangla_features, _ = self.bangla_lstm(bangla_features)  # LSTM processing
        
        # English feature extraction
        english_features = self.english_feature_extractor(english_x)  # 4D output
        english_features = english_features.permute(0, 2, 3, 1).reshape(english_features.size(0), -1, 1024)
        english_features, _ = self.english_lstm(english_features)  # LSTM processing

        # Concatenate features
        combined_features = torch.cat((bangla_features[:, -1, :], english_features[:, -1, :]), dim=1)

        # Fully connected layers
        x = torch.relu(self.fc1(combined_features))
        x = self.fc2(x)
        return x

class CombinedDataset(Dataset):
    def __init__(self, bangla_loader, english_loader):
        self.bangla_data = list(bangla_loader.dataset)
        self.english_data = list(english_loader.dataset)

        # Match lengths
        min_length = min(len(self.bangla_data), len(self.english_data))
        self.bangla_data = self.bangla_data[:min_length]
        self.english_data = self.english_data[:min_length]

        # Unique combined labels
        self.combined_labels = set(
            hash((f'b{self.bangla_data[idx][1]}', f'e{self.english_data[idx][1]}'))
            for idx in range(len(self.bangla_data))
        )

    def __len__(self):
        return len(self.bangla_data)

    def __getitem__(self, idx):
        bangla_image, bangla_label = self.bangla_data[idx]
        english_image, english_label = self.english_data[idx]

        # Unique label
        combined_label = hash((f'b{bangla_label}', f'e{english_label}')) % len(self.combined_labels)
        return bangla_image, english_image, combined_label

    @property
    def num_classes(self):
        return len(self.combined_labels)

In [3]:
# Create combined dataset and dataloader
combined_dataset = CombinedDataset(bangla_train_loader, english_train_loader)
combined_dataloader = DataLoader(combined_dataset, batch_size=32, shuffle=True)

# Initialize combined model
combined_classes = combined_dataset.num_classes
combined_model = CombinedModel(bangla_model, english_model, combined_classes)
combined_model.load_state_dict(torch.load("bilingual_ocr_combined_model_fully_trained.pth"))

  combined_model.load_state_dict(torch.load("bilingual_ocr_combined_model_fully_trained.pth"))


RuntimeError: Error(s) in loading state_dict for CombinedModel:
	size mismatch for fc2.weight: copying a param with shape torch.Size([1797, 32]) from checkpoint, the shape in current model is torch.Size([1798, 32]).
	size mismatch for fc2.bias: copying a param with shape torch.Size([1797]) from checkpoint, the shape in current model is torch.Size([1798]).

In [None]:
# Create combined test dataset and dataloader
combined_test_dataset = CombinedDataset(bangla_test_loader, english_test_loader)
combined_test_dataloader = DataLoader(combined_test_dataset, batch_size=32, shuffle=True)

import os
import torch
import matplotlib.pyplot as plt
import random
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import numpy as np
from sklearn.preprocessing import label_binarize


class Evaluator:
    def __init__(self, model, test_loader, num_classes, output_dir="evaluation_outputs"):
        """
        Initializes the evaluator.

        Args:
            model (torch.nn.Module): Trained model to evaluate.
            test_loader (DataLoader): DataLoader for the test dataset.
            num_classes (int): Number of classes in the dataset.
            output_dir (str): Directory to save evaluation plots and metrics.
        """
        self.model = model
        self.test_loader = test_loader
        self.num_classes = num_classes
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.output_dir = output_dir

        # Create output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)

    def evaluate_and_save(self):
        """
        Evaluates the model on test data and saves visualizations and metrics to disk.
        """
        self.model.eval()
        all_images = []
        all_labels = []
        all_predictions = []
        all_probabilities = []

        with torch.no_grad():
            for images, labels in self.test_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = self.model(images)
                probabilities = torch.softmax(outputs, dim=1)
                _, predictions = torch.max(outputs, 1)

                # Collect all images, labels, and predictions
                all_images.extend(images.cpu())  # Collect all test images
                all_labels.extend(labels.cpu().numpy())
                all_predictions.extend(predictions.cpu().numpy())
                all_probabilities.extend(probabilities.cpu().numpy())

        # Convert collected data to NumPy arrays
        all_labels = np.array(all_labels)
        all_predictions = np.array(all_predictions)
        all_probabilities = np.array(all_probabilities)

        # Save sample predictions
        self._save_sample_predictions(all_images, all_labels, all_predictions)

        # Save confusion matrix
        self._save_confusion_matrix(all_labels, all_predictions)

        # Save classification report
        self._save_classification_report(all_labels, all_predictions)

        # Save ROC curve
        self._save_roc_curve(all_labels, all_probabilities)

    def _save_sample_predictions(self, images, labels, predictions):
        """
        Randomly selects test images until there are images of 5 unique labels 
        and saves them along with their predictions and true labels to a file.
        """
        max_samples = 5  # Limit the number of unique labels
        unique_labels = {}
        selected_indices = []
        total_samples = len(labels)

        # Randomly shuffle indices
        indices = list(range(total_samples))
        random.shuffle(indices)

        # Select images with unique labels
        for idx in indices:
            label = labels[idx]
            if label not in unique_labels:
                unique_labels[label] = True
                selected_indices.append(idx)
                if len(unique_labels) == max_samples:
                    break

        # Plot and save the selected samples
        plt.figure(figsize=(15, 5))
        for i, idx in enumerate(selected_indices):
            plt.subplot(1, len(selected_indices), i + 1)
            plt.imshow(images[idx].squeeze(), cmap='gray')
            plt.title(f"True: {labels[idx]}\nPred: {predictions[idx]}")
            plt.axis('off')

        plt.tight_layout()
        file_path = os.path.join(self.output_dir, "sample_predictions.png")
        plt.savefig(file_path)
        plt.close()




    def _save_confusion_matrix(self, labels, predictions):
        """
        Saves the confusion matrix to a file.
        """
        cm = confusion_matrix(labels, predictions)
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=True, yticklabels=True)
        plt.title("Confusion Matrix")
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        file_path = os.path.join(self.output_dir, "confusion_matrix.png")
        plt.savefig(file_path)
        plt.close()

    def _save_classification_report(self, labels, predictions):
        """
        Saves the classification report to a text file.
        """
        report = classification_report(labels, predictions)
        file_path = os.path.join(self.output_dir, "classification_report.txt")
        with open(file_path, "w") as f:
            f.write(report)

    def _save_roc_curve(self, labels, probabilities):
        # Binarize labels for multi-class ROC calculation
        labels_binarized = label_binarize(labels, classes=range(self.num_classes))
        fpr, tpr, _ = roc_curve(labels_binarized.ravel(), probabilities.ravel())
        roc_auc = auc(fpr, tpr)

        # Plot the ROC curve
        plt.figure(figsize=(10, 8))
        plt.plot(fpr, tpr, label=f"Combined Classes (AUC = {roc_auc:.2f})")
        plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line
        plt.title("ROC Curve (All Classes)")
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.legend(loc="lower right")
        file_path = os.path.join(self.output_dir, "roc_curve.png")
        plt.savefig(file_path)
        plt.close()

print(bangla_test_loader)
evaluator = Evaluator(combined_model, combined_test_dataloader, combined_classes, output_dir="evaluation_outputs")
evaluator.evaluate_and_save()