In [2]:
import torch as t
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [3]:
def data_loader(batch_size):
    """
    Given a batch size, generate loaders

    Args:
        batch_size: int representing number of samples per batch

    Returns:
        train_loader: iterable training dataset organized according to batch size
        val_loader: iterable validation dataset organized according to batch size
        test_loader: iterable testing dataset organized according to batch size
    """
    transform_data = transforms.Compose([
        transforms.Resize(256, interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.CenterCrop(224), 
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Load the entire dataset
    full_dataset = datasets.ImageFolder(root='augmented', transform=transform_data)

    # Determine the subset of the dataset to use
    subset_size = .10
    fraction_size = int(len(full_dataset) * subset_size)

    # Take a subset of the full dataset
    subset_dataset, _ = t.utils.data.random_split(full_dataset, [int(len(full_dataset) * subset_size), len(full_dataset) - fraction_size])

    # Split the subset into training, validation, and testing
    train_size = int(0.7 * fraction_size)
    val_size = int(0.15 * fraction_size)
    test_size = fraction_size - train_size - val_size

    # Assign sizes to each subset
    train_dataset, val_dataset, test_dataset = t.utils.data.random_split(
        subset_dataset, [train_size, val_size, test_size]
    )

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    return train_loader, val_loader, test_loader

In [7]:
# Define batch size
batch_size = 32

# Load data using data_loader function
train_loader, val_loader, test_loader = data_loader(batch_size)

# Extracting features and labels from the train_loader
train_features = []
train_labels = []
for images, labels in train_loader:
    train_features.append(images.view(images.size(0), -1))
    train_labels.append(labels)
train_features = t.cat(train_features, dim=0)
train_labels = t.cat(train_labels)

# Standardizing the features
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)

# Create and train the logistic regression model
model = LogisticRegression(max_iter=1000, n_jobs=-1, verbose=2)
model.fit(train_features_scaled, train_labels)

# Evaluate the model on validation set
val_features = []
val_labels = []
for images, labels in val_loader:
    val_features.append(images.view(images.size(0), -1))
    val_labels.append(labels)
val_features = t.cat(val_features, dim=0)
val_labels = t.cat(val_labels)

val_features_scaled = scaler.transform(val_features)
val_pred = model.predict(val_features_scaled)

# Evaluate the model
accuracy = accuracy_score(val_labels, val_pred)
print("Accuracy:", accuracy)

# Display more detailed evaluation metrics
print(classification_report(val_labels, val_pred))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
