# Architectures used for experiments: 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from efficientnet_pytorch import EfficientNet
from torchvision.transforms import transforms

## EffecientNet

In [None]:
class EfficientNet_model(nn.Module):
    def __init__(self, num_classes, hidden_size=256, num_layers=2, dropout=0.25):
        super().__init__()
        # Load pre-trained model
        self.efficientnet = EfficientNet.from_pretrained('efficientnet-b0')
        # Freeze parameters of the pre-trained model
        for param in self.efficientnet.parameters():
            param.requires_grad = False
        # Temporal feature extraction through BI-LSTM
        self.lstm = nn.LSTM(input_size=1280, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, bidirectional=True)
        self.relu = nn.ReLU()
        # Dropout to prevent overfitting
        self.dropout1 = nn.Dropout(dropout)
        # Fully connected layer
        self.fc1 = nn.Linear(hidden_size * 2, 256)
        # Dropout to prevent overfitting
        self.dropout2 = nn.Dropout(dropout)
        self.relu2 = nn.Relu()
        # Last fully connected layer with number of classes as units to make predictions
        self.fc2 = nn.Linear(256, num_classes)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.efficientnet.extract_features(x)
        # Flatten the feature maps and add a batch dimension
        x = x.flatten(start_dim=2).permute(0, 2, 1)
        # Pass the flattened feature maps through the bidirectional LSTM layer
        x, _ = self.lstm(x)
        x = self.relu(x)
        # Concatenate the outputs of the forward and backward directions and pass through the fully connected layers with dropout
        x = self.dropout1(torch.cat((x[:, -1, :self.lstm.hidden_size], x[:, 0, self.lstm.hidden_size:]), dim=1))
        x = self.fc1(x)
        x = self.dropout2(x)
        x = self.relu2(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x


## ResNet18

In [None]:
class ResNet_model(nn.Module):
    def __init__(self, num_classes, hidden_size=512, num_layers=2, dropout=0.5):
        super(ResNetWithLSTM, self).__init__()
        # Load pre-trained ResNet-18 model
        self.resnet = models.resnet18(pretrained=True)
        # Freeze all layers
        for param in self.resnet.parameters():
            param.requires_grad = False
        # Last layer in ResNet18 is 1000 units, this is reduced to 512 units to fit BI-LSTM layer. 
        self.fc1 = nn.Linear(1000, 512)
        self.relu = nn.ReLU()
        # Dropout to reduce overtfitting
        self.dropout1 = nn.Dropout(dropout)
        # BI-LSTM layer to capture temporal feature extraction
        self.lstm = nn.LSTM(input_size=hidden_size,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            bidirectional=True,
                            batch_first=True)
        # Dropout to fight overfitting
        self.dropout2 = nn.Dropout(dropout) 
        self.relu2 = nn.ReLU()
        # Last fully connected layer with number of classes
        self.fc2 = nn.Linear(hidden_size, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.resnet(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout1(x)  # Apply dropout after the first fully connected layer
        x, _ = self.lstm(x)
        x = self.dropout2(x)  # Apply dropout after the LSTM layer
        x = self.relu2(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x


## MobileNet_v2

In [None]:
class MobileNet_model(nn.Module):
    def __init__(self, num_classes, hidden_size=256, num_layers=2, dropout=0.25):
        super().__init__()
        #Load pre-trained model
        self.mobilenet = torchvision.models.mobilenet_v2(pretrained=True).features
        # Freeze weights of pre-trained model
        for param in self.mobilenet.parameters():
            param.requires_grad = False
        # Adaptive layer to apply LSTM or Fully Connected layer    
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        # BI-LSTM for temporal feature extraction
        self.lstm = nn.LSTM(input_size=1280, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, bidirectional=True)
        # Dropout to reduce overfitting
        self.dropout1 = nn.Dropout(dropout)
        # Fully connected layer
        self.fc1 = nn.Linear(hidden_size * 2, 512)
        # Additional dropout to reduce overfitting
        self.dropout2 = nn.Dropout(dropout)
        # Last fully connected layer to produce final predictions
        self.fc2 = nn.Linear(512, num_classes)
        # Softmax function
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.mobilenet(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = x.unsqueeze(1)
        x = x.expand(-1, self.lstm.num_layers * 2, -1)
        x, _ = self.lstm(x)
        x = self.dropout1(torch.cat((x[:, -1, :self.lstm.hidden_size], x[:, 0, self.lstm.hidden_size:]), dim=1))
        x = self.fc1(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x
