In [1]:
import numpy as np
import torch
import random
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torchvision.datasets.folder import default_loader
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from torchvision.utils import make_grid
from torchvision import models, transforms
from torch.autograd import Variable

In [2]:
batch_size = 128

train_dir = "/home/dawlat.akaila/Documents/AI_LABS/PROJECT_FINAL/datasets/chest_xray/train"
val_dir = "/home/dawlat.akaila/Documents/AI_LABS/PROJECT_FINAL/datasets/chest_xray/val"
test_dir = "/home/dawlat.akaila/Documents/AI_LABS/PROJECT_FINAL/datasets/chest_xray/test"

transform = transforms.Compose(
                [
                    transforms.Resize((224, 224)),
                    transforms.ToTensor(),
                ]
            )

train_data = ImageFolder(train_dir, transform=transform, loader=default_loader)
val_data = ImageFolder(val_dir, transform=transform, loader=default_loader)
test_data = ImageFolder(test_dir, transform=transform, loader=default_loader)

In [3]:
def START_seed():
    seed = 9
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

In [4]:
# load dataset
START_seed()
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, num_workers = 8)
val_loader = DataLoader(dataset=val_data, batch_size=batch_size, shuffle=True, num_workers = 8)
test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False, num_workers = 8)

In [6]:
for images, _ in train_loader:
    print('images.shape:', images.shape)
    plt.figure(figsize=(16,8))
    plt.axis('off')
    plt.imshow(make_grid(images, nrow=16).permute((1, 2, 0)))
    break

In [5]:
# Extract features (X) and labels (y) for training, validation, and test sets
X_train, y_train = zip(*[(data, target) for data, target in train_data])
X_val, y_val = zip(*[(data, target) for data, target in val_data])
X_test, y_test = zip(*[(data, target) for data, target in test_data])

In [6]:
y_train = np.array(y_train)
y_val = np.array(y_val)
y_test = np.array(y_test)

In [7]:
# Load the pre-trained ResNet18 model
resnet = models.resnet18(pretrained=True)
resnet.eval()

# Function to extract features from an image
def extract_features(image_tensor):
    with torch.no_grad():
        image_variable = Variable(image_tensor.unsqueeze(0))
        features = resnet(image_variable)
    return features.flatten().numpy()



In [8]:
# Apply the feature extraction to each image in X_train
X_train_features = []

for i in range(len(X_train)):
    img_tensor = X_train[i]
    features = extract_features(img_tensor)
    X_train_features.append(features)

# Convert the list of features to a NumPy array
X_train_features = np.array(X_train_features)

In [9]:
# Apply the feature extraction to each image in X_train
X_val_features = []

for i in range(len(X_val)):
    img_tensor = X_train[i]
    features = extract_features(img_tensor)
    X_val_features.append(features)

# Convert the list of features to a NumPy array
X_val_features = np.array(X_val_features)

In [10]:
# Apply the feature extraction to each image in X_train
X_test_features = []

for i in range(len(X_test)):
    img_tensor = X_train[i]
    features = extract_features(img_tensor)
    X_test_features.append(features)

# Convert the list of features to a NumPy array
X_test_features = np.array(X_test_features)

In [11]:
# Assuming X_train_features is your extracted features
scaler = StandardScaler()
scaled_X_train = scaler.fit_transform(X_train_features)

# Adjust this based on your memory constraints
n_components = 16
pca = PCA(n_components=n_components)
X_train_pca = pca.fit_transform(scaled_X_train)

print(X_train_pca.shape)
# Now X_train_pca has reduced dimensions

(5216, 16)


In [12]:
# Assuming X_train_features is your extracted features
scaler = StandardScaler()
scaled_X_val = scaler.fit_transform(X_val_features)

# Adjust this based on your memory constraints
n_components = 16
pca = PCA(n_components=n_components)
X_val_pca = pca.fit_transform(scaled_X_val)

print(X_val_pca.shape)
# Now X_train_pca has reduced dimensions

(16, 16)


In [13]:
# Assuming X_train_features is your extracted features
scaler = StandardScaler()
scaled_X_test = scaler.fit_transform(X_test_features)

# Adjust this based on your memory constraints
n_components = 16
pca = PCA(n_components=n_components)
X_test_pca = pca.fit_transform(scaled_X_test)

print(X_test_pca.shape)
# Now X_train_pca has reduced dimensions

(624, 16)


In [14]:
# Create and fit the auto-sklearn classifier
logreg_model = LogisticRegression()

In [18]:
# Make predictions on the val set
logreg_model.fit(X_train_pca, y_train)
y_pred_val = logreg_model.predict(X_val_pca)

# Evaluate the model on the test set
accuracy_test = sklearn.metrics.accuracy_score(y_val, y_pred_val)
mse = mean_squared_error(y_val, y_pred_val)

print(f"Validation Accuracy: {accuracy_test}")
print(f'Mean Squared Error: {mse:.2f}')

Validation Accuracy: 0.4375
Mean Squared Error: 0.56


In [17]:
# Make predictions on the test set
y_pred_test = logreg_model.predict(X_test_pca)

# Evaluate the model on the test set
accuracy_test = sklearn.metrics.accuracy_score(y_test, y_pred_test)
mse = mean_squared_error(y_test, y_pred_test)

print(f"Test Accuracy: {accuracy_test}")
print(f'Mean Squared Error: {mse:.2f}')

Test Accuracy: 0.5480769230769231
Mean Squared Error: 0.45
