<a href="https://colab.research.google.com/github/devshah21/PhotoML_baselinemodel/blob/main/baseline_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
data_dir = '/content/drive/MyDrive/Dana_1'

In [3]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from sklearn.cluster import KMeans


# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)


# Define image transformations
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# Define function to cluster images
def cluster_images(image_folder, n_clusters):
    dataset = torchvision.datasets.ImageFolder(root=image_folder, transform=transform)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=False, num_workers=2)
    
    features = []
    for images, _ in dataloader:
        with torch.no_grad():
            features.append(model(images.to(device)).detach().cpu().numpy())
    
    features = np.concatenate(features)
    
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    labels = kmeans.fit_predict(features)
    
    return labels
    

# Define function to label a subset of images from each cluster
def label_images(image_folder, labels, n_labeled):
    labeled_indices = []
    
    for label in np.unique(labels):
        indices = np.where(labels == label)[0]
        if len(indices) > n_labeled:
            labeled_indices.extend(list(np.random.choice(indices, n_labeled, replace=False)))
        else:
            labeled_indices.extend(list(indices))
    
    labeled_images = []
    for index in labeled_indices:
        image_path = os.path.join(image_folder, dataset.samples[index][0])
        labeled_images.append((image_path, dataset.classes[dataset.samples[index][1]]))
    
    return labeled_images


# Define function to train binary classification model on labeled images
def train_model(labeled_images, device, num_epochs):
    # Define the dataset
    labeled_dataset = torchvision.datasets.ImageFolder(labeled_images, transform=transform)

    # Define the dataloader
    labeled_dataloader = torch.utils.data.DataLoader(labeled_dataset, batch_size=16, shuffle=True, num_workers=2)

    # Load a pre-trained ResNet18 model
    model = torchvision.models.resnet18(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 2)

    # Move model to device
    model.to(device)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    # Train the model
    for epoch in range(num_epochs):
        for images, labels in labeled_dataloader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    return model


# Define function to score images
def score_images(image_folder, model, device):
    dataset = torchvision.datasets.ImageFolder(root=image_folder, transform=transform)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=False, num_workers=2)
    
    scores = []
    for images, _ in dataloader:
        with torch.no_grad():
            outputs = torch.softmax(model(images.to(device)), dim=1)
            scores.extend(list(outputs[:, 1].detach().cpu().numpy()))
    
    return
