In [1]:
import glob
import torch
from torch import nn
import pandas as pd
import numpy as np

In [2]:
class TestMultiViewDataset(torch.utils.data.Dataset):

    def __init__(self, frontal_dir, left_dir, right_dir, output_feature_len=400):
        self.frontal_dir_npy_files = glob.glob(frontal_dir + '/*')
        self.left_dir_npy_files = glob.glob(left_dir + '/*')
        self.right_dir_npy_files = glob.glob(right_dir + '/*')

        self.output_feature_len = output_feature_len

    def __len__(self):
        return len(self.frontal_dir_npy_files)


    def get_center_crop_feature(self, array):
        n = array.shape[0]
        
        # Check if the array has fewer than self.output_feature_len elements
        if n <= self.output_feature_len:
            raise Exception('Less than output feature len')  # Return the entire array if it has self.output_feature_len or fewer elements
        
        # Calculate the starting index
        start_index = (n - self.output_feature_len) // 2
        
        # Extract and return the central self.output_feature_len elements
        return array[start_index:start_index + self.output_feature_len]


    def resize_sequence(self, features):
        if features.shape[0] < self.output_feature_len:
            diff = self.output_feature_len - features.shape[0]
            pad = np.zeros((diff, features.shape[1], features.shape[2]))  # Ensure pad has the same dimensions as features
            features = np.concatenate((features, pad), axis=0)
        else:
            mid_point = features.shape[0] // 2
            window_size = self.output_feature_len // 2
            features = features[int(mid_point - window_size): int(mid_point + window_size), :, :]
        return features

    
    def __getitem__(self, idx):
        frontal_feature_filename = self.frontal_dir_npy_files[idx]
        left_feature_filename = self.left_dir_npy_files[idx]
        right_feature_filename = self.right_dir_npy_files[idx]

        frontal_feature = torch.from_numpy(self.resize_sequence(np.load(frontal_feature_filename)))
        left_feature = torch.from_numpy(self.resize_sequence(np.load(left_feature_filename)))
        right_feature = torch.from_numpy(self.resize_sequence(np.load(right_feature_filename)))


        # print(f'frontal feature shape', frontal_feature.shape)
        # print(f'left feature shape', left_feature.shape)
        # print(f'right feature shape', right_feature.shape)


        concatenated_feature = torch.cat([frontal_feature, left_feature, right_feature], dim=1)


        # get only sample id from filename
        sample_id = frontal_feature_filename.split('/')[-1]
        # print(sample_id)

        return concatenated_feature, sample_id

In [3]:
frontal_dir = './VGG16_test_features/frontal_view/VGG16_features'
left_dir = './VGG16_test_features/left_view'
right_dir = './VGG16_test_features/right_view'

In [4]:
test_mv_dataset = TestMultiViewDataset(frontal_dir, left_dir, right_dir)

In [5]:
testing_dataloader = torch.utils.data.DataLoader(test_mv_dataset, batch_size=32, shuffle=True)

In [6]:
for feature, filename in testing_dataloader:
    print(feature.shape)
    print(filename)

    print('\n\n\n')

torch.Size([32, 400, 3, 512])
('392991b5-f24b-41bd-a690-6b998c5b8f25.npy', '3b3d1027-51ea-4502-b60d-6960b3d4d2d4.npy', 'a3e578aa-ba6a-4f81-8a01-10597a64ed19.npy', '89c1ddc4-b25d-4b5d-81b4-571fc8e1ff09.npy', 'ac34341c-f835-499e-8706-b143adbabc5e.npy', '97b7c9dd-74b3-4051-a2a8-a66b8bd5418b.npy', 'ad26369a-749b-4222-a86f-055ea43d9291.npy', '18148502-eb8a-49a1-a95d-76a02852a9cc.npy', 'b2a75b06-bec7-4f92-8d7e-d0a2e523e975.npy', '1d39094f-7d92-47c0-9dc2-f6bace8bfd8b.npy', '6b678d0e-3cb3-4437-a421-b4a5f032939f.npy', '75528de3-719a-461e-89f8-2db8d70d318d.npy', 'ca167fa6-c6df-4378-a588-68b2e019b2cb.npy', '2d350c6d-e025-45f1-a8e6-80552c791c46.npy', '4ef07058-9477-4216-b677-bd3e6a906c7e.npy', 'ead027d3-45d2-4366-896b-d135371be543.npy', '92a63b52-f2fc-4f48-b9bf-22c176bb5a25.npy', 'd54baae8-e42f-4065-b6c1-27e4a8075a8f.npy', 'c53b0315-5f84-499d-aba9-1a3edbcd5e85.npy', '95be7df6-317d-4059-9ad6-8ba087ea774d.npy', '5747fef5-f2a4-47d2-a58a-96071c709ea0.npy', '49ab74ee-0433-4ec2-8cf1-5331034918f9.npy', '

In [7]:
class Model(nn.Module):
    def __init__(self, num_classes=6):
        super(Model, self).__init__()
        self.num_classes = num_classes

    def forward(self, x):
        return x

    def fit(self, train_loader, valid_loader, epochs, lr, weight_decay, early_stopping_patience, is_wandb=False, device='cuda', use_scheduler=False, save_name="model"):
        optimizer = optim.Adam(self.parameters(), lr=lr, weight_decay=weight_decay)
        criterion = nn.CrossEntropyLoss()
        scheduler = None

        if use_scheduler:
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)

        best_val_acc = 0
        patience_counter = 0

        for epoch in range(epochs):
            self.train()
            for batch_idx, data in enumerate(train_loader):
                features = data["features"]
                label = data["labels"]

                features = features.to(device)
                label = label.to(device)

                optimizer.zero_grad()
                output = self(features)
                loss = criterion(output, label)
                loss.backward()

                torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1.0)

                optimizer.step()

                predictions = torch.argmax(output, dim=1)
                train_acc = custom_accuracy(predictions, label)
                train_f1 = custom_f1_score(predictions, label)

                if batch_idx % 100 == 0:
                    print(f"Epoch: {epoch+1} | Batch: {batch_idx} | CrossEntropyLoss: {loss.item():.3f} | Accuracy: {train_acc:.3f} | F1Score: {train_f1:.3f}")

            if scheduler:
                scheduler.step()

            self.eval()
            with torch.no_grad():
                val_f1_scores = []
                val_acc_scores = []
                for val_idx, val_data in enumerate(valid_loader):
                    val_features = val_data["features"]
                    val_label = val_data["labels"]

                    val_features = val_features.to(device)
                    val_label = val_label.to(device)

                    val_output = self(val_features)

                    val_predictions = torch.argmax(val_output, dim=1)
                    val_acc = custom_accuracy(val_predictions, val_label)
                    val_f1 = custom_f1_score(val_predictions, val_label)

                    val_f1_scores.append(val_f1)
                    val_acc_scores.append(val_acc)

                avg_val_f1_score = sum(val_f1_scores) / len(val_f1_scores)
                avg_val_acc_score = sum(val_acc_scores) / len(val_acc_scores)

                print(f"Validation -> Epoch: {epoch} | Accuracy: {avg_val_acc_score:.3f} | F1Score: {avg_val_f1_score:.3f}")

                # Early stopping check
                if avg_val_acc_score > best_val_acc:
                    best_val_acc = avg_val_acc_score
                    torch.save(self.state_dict(), f"{save_name}_best.pt")
                    print(f"Model saved with best validation accuracy: {best_val_acc:.3f} at epoch {epoch}")
                    patience_counter = 0  # Reset patience counter
                else:
                    patience_counter += 1

                if patience_counter >= early_stopping_patience:
                    print("Early stopping triggered")
                    break

            if epoch == epochs - 1:
                torch.save(self.state_dict(), f"{save_name}_last_epoch.pt")
                print(f"Model saved at the last epoch {epoch}")





class CNN_LSTM_MULTI(Model):
    def __init__(self, num_classes=6, lstm_layers=2, hidden_size=64, dropout_rate=0.25):
        super(CNN_LSTM_MULTI, self).__init__(num_classes)
        self.hidden_size = hidden_size
        self.lstm_layers = lstm_layers

        self.conv1 = nn.Conv1d(in_channels=512, out_channels=64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.LeakyReLU()
        self.dropout = nn.Dropout(dropout_rate)

        self.lstm = nn.LSTM(64 * 3, hidden_size, batch_first=True, num_layers=lstm_layers, bidirectional=True, dropout=dropout_rate)
        self.fc = nn.Linear(hidden_size * 2, self.num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        batch_size, seq_len, num_views, feature_dim = x.size()
        # x: [32, 230, 3, 512]

        # Process each view separately
        features = []
        for view in range(num_views):
            view_x = x[:, :, view, :]
            view_x = view_x.transpose(1, 2)  # Transpose to [batch_size, feature_dim, seq_len]
            view_x = self.conv1(view_x)
            view_x = self.bn1(view_x)
            view_x = self.relu(view_x)
            view_x = self.dropout(view_x)
            view_x = view_x.transpose(1, 2)  # Transpose back to [batch_size, seq_len, feature_dim]
            features.append(view_x)

        # Concatenate features from all views
        x = torch.cat(features, dim=2)
        # x: [32, 230, 64 * 3]

        self.lstm.flatten_parameters()
        lstm_out, _ = self.lstm(x)
        # LSTM processing, output shape: [32, 230, hidden_size * 2]

        lstm_out = lstm_out[:, -1, :]
        # Get the output from the last time step, output shape: [32, hidden_size * 2]

        output = self.fc(lstm_out)
        # Fully connected layer, output shape: [32, num_classes]

        output = self.softmax(output)
        # Softmax, output shape: [32, num_classes]

        return output

In [8]:
# read model file
device = 'cpu'
mv_model_path = './cnn_lstm_multi_model_final_lr_0.001_dropout_0.25_wd_1e-05.pt'

In [9]:
model_multi = CNN_LSTM_MULTI(num_classes=6, lstm_layers=2, hidden_size=128, dropout_rate=0.25)
model_multi.load_state_dict(torch.load(mv_model_path, map_location=device))

<All keys matched successfully>

In [10]:
import numpy as np
import pandas as pd

# Assuming the number of classes
num_classes = 6

all_pred_arr = []
all_filenames_arr = []

for feature_batch, filename_batch in testing_dataloader:


    labels = model_multi(feature_batch.float()).detach().numpy()
    labels = np.argmax(labels, axis=-1).tolist()


    all_pred_arr.append(labels)
    all_filenames_arr.append(filename_batch)
    
    



In [11]:
new_preds = []
for pred_arr in all_pred_arr:
    for pred_class in pred_arr:
        new_preds.append(pred_class)


new_filenames = []
for filename_arr in all_filenames_arr:
    for filename in filename_arr:
        new_filenames.append(filename)

In [12]:
new_preds

[3,
 2,
 4,
 3,
 0,
 0,
 4,
 2,
 3,
 1,
 3,
 1,
 4,
 4,
 3,
 2,
 1,
 4,
 4,
 2,
 1,
 0,
 2,
 1,
 5,
 3,
 2,
 3,
 2,
 1,
 1,
 0,
 3,
 2,
 2,
 1,
 2,
 1,
 4,
 4,
 5,
 3,
 1,
 0,
 2,
 2,
 0,
 5,
 3,
 3,
 1,
 3,
 4,
 3,
 2,
 0,
 1,
 1,
 2,
 1,
 4,
 3,
 4,
 4,
 3,
 2,
 2,
 1,
 2,
 4,
 2,
 1,
 5,
 4,
 0,
 2,
 0,
 1,
 3,
 4,
 4,
 0,
 4,
 3,
 4,
 4,
 2,
 2,
 0,
 1,
 3,
 4,
 4,
 3,
 0,
 3,
 1,
 3,
 2,
 1,
 0,
 0,
 1,
 1,
 2,
 1,
 1,
 1,
 4,
 2,
 2,
 4,
 2,
 4,
 2,
 2,
 3,
 4,
 1,
 2,
 0,
 4,
 3,
 4,
 3,
 4,
 2,
 4,
 3,
 3,
 1,
 1,
 0,
 4,
 3,
 2,
 1,
 5,
 2,
 4,
 1,
 3,
 4,
 2,
 3,
 2,
 2,
 3,
 2,
 1,
 2,
 4,
 4,
 4,
 2,
 2,
 3,
 4,
 3,
 1,
 1,
 4,
 3,
 0,
 0,
 2,
 2,
 4,
 1,
 3,
 2,
 2,
 5,
 4,
 0,
 4,
 1,
 1,
 5,
 1,
 3,
 5,
 1,
 2,
 2,
 3,
 2,
 3,
 1,
 4,
 3,
 3,
 4,
 0,
 1,
 4,
 4,
 5,
 2,
 2,
 1,
 1,
 2,
 2,
 1,
 3,
 2,
 1,
 2,
 1,
 0,
 2,
 2,
 3,
 4,
 0,
 2,
 3,
 1,
 4,
 1,
 3,
 2,
 2,
 1,
 1,
 4,
 4,
 1,
 2,
 3,
 4,
 3,
 4,
 2,
 4,
 3,
 5,
 2,
 2,
 4,
 4,
 3,
 2,
 1,
 2,
 2,
 2,
 1,
 1,


## RUN THE FOLLOWING CODE TO SAVE THE PREDICTIONS TO AN OUTPUT CSV

In [13]:
# # Create one-hot encodings
# one_hot_labels = np.eye(num_classes)[new_preds].astype(np.int8)

# # Create a DataFrame
# df = pd.DataFrame(one_hot_labels, columns=[f'class_{i+1}' for i in range(num_classes)])
# df.insert(0, 'filename', new_filenames)

# # Save the DataFrame to a CSV file
# df.to_csv('output.csv', index=False)

# print("Data saved to output.csv")

Data saved to output.csv


In [14]:
# !cat output.csv

filename,class_1,class_2,class_3,class_4,class_5,class_6
30b2c119-7f65-4791-afc4-93bbd8a2592f.npy,0,0,0,1,0,0
824ecb50-b77b-4bb3-bf79-98f87a51c42f.npy,0,0,1,0,0,0
08fad321-94d6-4a5d-9203-4b76e0e26589.npy,0,0,0,0,1,0
e50fdbad-6817-4439-b7c0-97ae63bac3cc.npy,0,0,0,1,0,0
a7aa14d4-c451-4968-8585-89281f99a4ec.npy,1,0,0,0,0,0
dc3cc96e-37d8-48f9-b033-6b75cc9de47b.npy,1,0,0,0,0,0
11b727b1-e4a6-46bc-92b4-6689cf2ca09e.npy,0,0,0,0,1,0
2fbc0d39-970f-4d11-801e-a0ba184af6ed.npy,0,0,1,0,0,0
96163dcf-243f-4813-8dcc-340ef5304059.npy,0,0,0,1,0,0
c53b0315-5f84-499d-aba9-1a3edbcd5e85.npy,0,1,0,0,0,0
959a8c82-fd17-44a5-911f-cd5cdc3b773a.npy,0,0,0,1,0,0
086d9421-afb3-423d-8170-04ae2fcc3948.npy,0,1,0,0,0,0
5b8ec090-f772-4b12-bee4-d6e765ebe725.npy,0,0,0,0,1,0
b2a75b06-bec7-4f92-8d7e-d0a2e523e975.npy,0,0,0,0,1,0
43cf172a-9fd9-41a5-a83b-1f17a0f1ff61.npy,0,0,0,1,0,0
c9b113f3-3dcb-488f-a90c-63e4a486a609.npy,0,0,1,0,0,0
40b2d516-8879-4910-b9bd-edbc85a80c67.npy,0,1,0,0,0,0
71481dc5-8709-45c4-970b-0aeb0b2cbbf9.npy,0