<a href="https://colab.research.google.com/github/ethanarsht/detective-dogs/blob/main/LogReg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torchvision.datasets import ImageFolder
import os
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/drive')
mydrive = "/content/drive/MyDrive/Pneumonia_Detective_Dogs"

Mounted at /content/drive


In [3]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, img_dir_path, transform=None):
        self.img_labels = pd.read_csv(csv_file)
        self.img_dir = img_dir_path
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path).convert('RGB')
        label_str = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        label = 0 if label_str == "NORMAL" else 1
        return image, torch.tensor(label)

transforms = T.Compose([
    T.Resize((128, 128)),
    T.ToTensor(),
    T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

training_data = CustomImageDataset('/content/drive/MyDrive/Pneumonia_Detective_Dogs/pneumonia_images/train/train.csv',
                                   '/content/drive/MyDrive/Pneumonia_Detective_Dogs/pneumonia_images/train/train_images',
                                   transforms)
val_data = CustomImageDataset('/content/drive/MyDrive/Pneumonia_Detective_Dogs/pneumonia_images/val/val.csv',
                              '/content/drive/MyDrive/Pneumonia_Detective_Dogs/pneumonia_images/val/val_images',
                               transforms)
test_data = CustomImageDataset('/content/drive/MyDrive/Pneumonia_Detective_Dogs/pneumonia_images/test/test.csv',
                              '/content/drive/MyDrive/Pneumonia_Detective_Dogs/pneumonia_images/test/test_images',
                               transforms)

train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

input_size = 128 * 128 * 3
num_classes = 2



In [4]:
# Logistic regression model
model = nn.Linear(input_size, num_classes)

class PneumoniaModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)
        
    def forward(self, xb):
        xb = xb.view(-1, input_size)
        out = self.linear(xb)
        return out

loss_fn = F.cross_entropy

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    optimizer = opt_func(model.parameters(), lr=lr)
    history = []
    
    for epoch in range(epochs):
        for batch in train_loader:
            images, labels = batch
            images = images.view(images.size(0), -1)
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        result = evaluate(model, val_loader)
        history.append(result)
    
    return history

def evaluate(model, val_loader):
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch in val_loader:
            images, labels = batch
            images = images.view(images.size(0), -1)  # Reshape images
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    return accuracy


def predict_image(img, model):
    xb = img.unsqueeze(0)
    yb = model(xb)
    _, preds = torch.max(yb, dim=1)
    return preds[0].item()

test_dataset = ImageFolder('/content/drive/MyDrive/Pneumonia_Detective_Dogs/pneumonia_images/test', transform=transforms)
test_loader = DataLoader(test_dataset, batch_size=256)
result = evaluate(model, test_loader)
result

0.27323717948717946

In [None]:
from sklearn.metrics import f1_score

def evaluate(model, data_loader):
    model.eval()
    targets = []
    predictions = []

    with torch.no_grad():
        for images, labels in data_loader:
            images = images.view(images.size(0), -1)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)

            targets.extend(labels.tolist())
            predictions.extend(predicted.tolist())

    f1 = f1_score(targets, predictions, average='macro')
    return f1


result = evaluate(model, test_loader)
result

0.23489536007384648