# A Novel Approach for Three-Way Classification of Lumbar Spine Degeneration Using Pseudo-Modality Learning to Handle Missing MRI Data

## Libs

In [None]:
import os
import numpy as np
import pandas as pd
import torch
from torchvision import models, transforms
from tqdm import tqdm

## Embeddings Generator using Residual Net50

### AT2 Grey Scale

In [None]:
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 512)
model.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

df = pd.read_csv('/kaggle/input/preprocessed-dataset/train_data_AT2.csv')

results = []
for index, row in tqdm(df.iterrows()):
    patient_id = str(row['study_id'])
    series_id = str(row['series_id'])
    
    series_path = os.path.join("/kaggle/input/preprocessed-dataset/grey_scale_train", patient_id, series_id)

    embeddings = []

    for slice_file in os.listdir(series_path):
        if slice_file.endswith('.npy'):
            slice_path = os.path.join(series_path, slice_file)
            slice_data = np.load(slice_path)

            if slice_data.ndim == 2:
                slice_data = np.stack([slice_data] * 3, axis=0)
            elif slice_data.ndim == 3:
                if slice_data.shape[0] == 1:
                    slice_data = np.repeat(slice_data, 3, axis=0)
            else:
                raise ValueError(f"Unexpected slice shape: {slice_data.shape}")

            input_tensor = torch.from_numpy(slice_data).float()

            input_tensor = transforms.Resize((224, 224))(input_tensor)

            input_tensor = (input_tensor - torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)) / torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

            input_tensor = input_tensor.unsqueeze(0)

            with torch.no_grad():
                embedding = model(input_tensor)
                embeddings.append(embedding.numpy())

    if embeddings:
        average_embedding = np.mean(np.vstack(embeddings), axis=0)
        embedding_dict = {f'{i}': average_embedding[i] for i in range(512)}

        embedding_dict.update({'study_id': patient_id, 'series_id': series_id})

        results.append(embedding_dict)

results_df = pd.DataFrame(results)

results_df.to_csv('final_embeddings.csv', index=False)

torch.save(model.state_dict(), 'model_embeddings.pth')

print("Embeddings generation completed and saved to 'final_embeddings.csv'")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 168MB/s]
2226it [1:50:34,  2.98s/it]


Embeddings generation completed and saved to 'final_embeddings.csv'


### AT2 Histogram Equalized

In [None]:
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 512)
model.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

df = pd.read_csv('/kaggle/input/preprocessed-dataset/train_data_AT2.csv')

results = []
for index, row in tqdm(df.iterrows()):
    patient_id = str(row['study_id'])
    series_id = str(row['series_id'])
    
    series_path = os.path.join("/kaggle/input/preprocessed-dataset/hist_norm_train", patient_id, series_id)

    embeddings = []

    for slice_file in os.listdir(series_path):
        if slice_file.endswith('.npy'):
            slice_path = os.path.join(series_path, slice_file)
            slice_data = np.load(slice_path)

            if slice_data.ndim == 2:
                slice_data = np.stack([slice_data] * 3, axis=0)
            elif slice_data.ndim == 3:
                if slice_data.shape[0] == 1:
                    slice_data = np.repeat(slice_data, 3, axis=0)
            else:
                raise ValueError(f"Unexpected slice shape: {slice_data.shape}")

            input_tensor = torch.from_numpy(slice_data).float()

            input_tensor = transforms.Resize((224, 224))(input_tensor)

            input_tensor = (input_tensor - torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)) / torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

            input_tensor = input_tensor.unsqueeze(0)

            with torch.no_grad():
                embedding = model(input_tensor)
                embeddings.append(embedding.numpy())

    if embeddings:
        average_embedding = np.mean(np.vstack(embeddings), axis=0)
        embedding_dict = {f'{i}': average_embedding[i] for i in range(512)}

        embedding_dict.update({'study_id': patient_id, 'series_id': series_id})

        results.append(embedding_dict)

results_df = pd.DataFrame(results)

results_df.to_csv('final_embeddings.csv', index=False)

torch.save(model.state_dict(), 'model_embeddings.pth')

print("Embeddings generation completed and saved to 'final_embeddings.csv'")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 187MB/s]
2226it [1:53:06,  3.05s/it]


Embeddings generation completed and saved to 'final_embeddings.csv'


### ST2 Grey Scale

In [None]:
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 512)
model.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

df = pd.read_csv('/kaggle/input/preprocessed-dataset/train_data_ST2.csv')

results = []
for index, row in tqdm(df.iterrows()):
    patient_id = str(row['study_id'])
    series_id = str(row['series_id'])
    
    series_path = os.path.join("/kaggle/input/preprocessed-dataset/grey_scale_train", patient_id, series_id)

    embeddings = []

    for slice_file in os.listdir(series_path):
        if slice_file.endswith('.npy'):
            slice_path = os.path.join(series_path, slice_file)
            slice_data = np.load(slice_path)

            if slice_data.ndim == 2:
                slice_data = np.stack([slice_data] * 3, axis=0)
            elif slice_data.ndim == 3:
                if slice_data.shape[0] == 1:
                    slice_data = np.repeat(slice_data, 3, axis=0)
            else:
                raise ValueError(f"Unexpected slice shape: {slice_data.shape}")

            input_tensor = torch.from_numpy(slice_data).float()

            input_tensor = transforms.Resize((224, 224))(input_tensor)

            input_tensor = (input_tensor - torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)) / torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

            input_tensor = input_tensor.unsqueeze(0)

            with torch.no_grad():
                embedding = model(input_tensor)
                embeddings.append(embedding.numpy())

    if embeddings:
        average_embedding = np.mean(np.vstack(embeddings), axis=0)
        embedding_dict = {f'{i}': average_embedding[i] for i in range(512)}

        embedding_dict.update({'study_id': patient_id, 'series_id': series_id})

        results.append(embedding_dict)

results_df = pd.DataFrame(results)

results_df.to_csv('final_embeddings.csv', index=False)

torch.save(model.state_dict(), 'model_embeddings.pth')

print("Embeddings generation completed and saved to 'final_embeddings.csv'")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:08<00:00, 12.5MB/s]
1876it [46:50,  1.50s/it]


Embeddings generation completed and saved to 'final_embeddings.csv'


### ST2 Histogram Equalized

In [None]:
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 512)
model.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

df = pd.read_csv('/kaggle/input/preprocessed-dataset/train_data_ST2.csv')

results = []
for index, row in tqdm(df.iterrows()):
    patient_id = str(row['study_id'])
    series_id = str(row['series_id'])
    
    series_path = os.path.join("/kaggle/input/preprocessed-dataset/hist_norm_train", patient_id, series_id)

    embeddings = []

    for slice_file in os.listdir(series_path):
        if slice_file.endswith('.npy'):
            slice_path = os.path.join(series_path, slice_file)
            slice_data = np.load(slice_path)

            if slice_data.ndim == 2:
                slice_data = np.stack([slice_data] * 3, axis=0)
            elif slice_data.ndim == 3:
                if slice_data.shape[0] == 1:
                    slice_data = np.repeat(slice_data, 3, axis=0)
            else:
                raise ValueError(f"Unexpected slice shape: {slice_data.shape}")

            input_tensor = torch.from_numpy(slice_data).float()

            input_tensor = transforms.Resize((224, 224))(input_tensor)

            input_tensor = (input_tensor - torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)) / torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

            input_tensor = input_tensor.unsqueeze(0)

            with torch.no_grad():
                embedding = model(input_tensor)
                embeddings.append(embedding.numpy())

    if embeddings:
        average_embedding = np.mean(np.vstack(embeddings), axis=0)
        embedding_dict = {f'{i}': average_embedding[i] for i in range(512)}

        embedding_dict.update({'study_id': patient_id, 'series_id': series_id})

        results.append(embedding_dict)

results_df = pd.DataFrame(results)

results_df.to_csv('final_embeddings.csv', index=False)

torch.save(model.state_dict(), 'model_embeddings.pth')

print("Embeddings generation completed and saved to 'final_embeddings.csv'")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 182MB/s]
1876it [44:25,  1.42s/it]


Embeddings generation completed and saved to 'final_embeddings.csv'


### ST1 Grey Scale

In [None]:
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 512)
model.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

df = pd.read_csv('/kaggle/input/preprocessed-dataset/train_data_ST1.csv')

results = []
for index, row in tqdm(df.iterrows()):
    patient_id = str(row['study_id'])
    series_id = str(row['series_id'])
    
    series_path = os.path.join("/kaggle/input/preprocessed-dataset/grey_scale_train", patient_id, series_id)

    embeddings = []

    for slice_file in os.listdir(series_path):
        if slice_file.endswith('.npy'):
            slice_path = os.path.join(series_path, slice_file)
            slice_data = np.load(slice_path)

            if slice_data.ndim == 2:
                slice_data = np.stack([slice_data] * 3, axis=0)
            elif slice_data.ndim == 3:
                if slice_data.shape[0] == 1:
                    slice_data = np.repeat(slice_data, 3, axis=0)
            else:
                raise ValueError(f"Unexpected slice shape: {slice_data.shape}")

            input_tensor = torch.from_numpy(slice_data).float()

            input_tensor = transforms.Resize((224, 224))(input_tensor)

            input_tensor = (input_tensor - torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)) / torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

            input_tensor = input_tensor.unsqueeze(0)

            with torch.no_grad():
                embedding = model(input_tensor)
                embeddings.append(embedding.numpy())

    if embeddings:
        average_embedding = np.mean(np.vstack(embeddings), axis=0)
        embedding_dict = {f'{i}': average_embedding[i] for i in range(512)}

        embedding_dict.update({'study_id': patient_id, 'series_id': series_id})

        results.append(embedding_dict)

results_df = pd.DataFrame(results)

results_df.to_csv('final_embeddings.csv', index=False)

torch.save(model.state_dict(), 'model_embeddings.pth')

print("Embeddings generation completed and saved to 'final_embeddings.csv'")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 196MB/s]
1881it [47:25,  1.51s/it]


Embeddings generation completed and saved to 'final_embeddings.csv'


### ST1 Historgram Equalized

In [None]:
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 512)
model.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

df = pd.read_csv('/kaggle/input/preprocessed-dataset/train_data_ST1.csv')

results = []
for index, row in tqdm(df.iterrows()):
    patient_id = str(row['study_id'])
    series_id = str(row['series_id'])
    
    series_path = os.path.join("/kaggle/input/preprocessed-dataset/hist_norm_train", patient_id, series_id)

    embeddings = []

    for slice_file in os.listdir(series_path):
        if slice_file.endswith('.npy'):
            slice_path = os.path.join(series_path, slice_file)
            slice_data = np.load(slice_path)

            if slice_data.ndim == 2:
                slice_data = np.stack([slice_data] * 3, axis=0)
            elif slice_data.ndim == 3:
                if slice_data.shape[0] == 1:
                    slice_data = np.repeat(slice_data, 3, axis=0)
            else:
                raise ValueError(f"Unexpected slice shape: {slice_data.shape}")

            input_tensor = torch.from_numpy(slice_data).float()

            input_tensor = transforms.Resize((224, 224))(input_tensor)

            input_tensor = (input_tensor - torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)) / torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

            input_tensor = input_tensor.unsqueeze(0)

            with torch.no_grad():
                embedding = model(input_tensor)
                embeddings.append(embedding.numpy())

    if embeddings:
        average_embedding = np.mean(np.vstack(embeddings), axis=0)
        embedding_dict = {f'{i}': average_embedding[i] for i in range(512)}

        embedding_dict.update({'study_id': patient_id, 'series_id': series_id})

        results.append(embedding_dict)

results_df = pd.DataFrame(results)

results_df.to_csv('final_embeddings.csv', index=False)

torch.save(model.state_dict(), 'model_embeddings.pth')

print("Embeddings generation completed and saved to 'final_embeddings.csv'")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 190MB/s]
1881it [50:34,  1.61s/it]


Embeddings generation completed and saved to 'final_embeddings.csv'
