<a href="https://colab.research.google.com/github/wandb/examples/blob/master/colabs/intro/Intro_to_Weights_&_Biases.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
<!--- @wandbcode{intro-colab} -->

<img src="http://wandb.me/logo-im-png" width="400" alt="Weights & Biases" />
<!--- @wandbcode{intro-colab} -->

In [None]:
!pip install Pillow
!pip install  wandb -qU

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.6/188.6 kB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m218.8/218.8 kB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


## **All Imports Here**

In [None]:
from PIL import Image
import os
import math
import random
import torch
import torch.optim as optim
import torch.nn as nn
from torchsummary import summary
from torchvision.transforms import transforms
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, random_split, ConcatDataset
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import wandb

## Connect to Drive for Dataset Retrieval

In [None]:
from google.colab import drive

drive.mount('/content/drive')

cocastic_drive_account = "drive/MyDrive/Palm_oil_Adulteration_Dataset/Palm_Oil_Dataset_Demo"

Mounted at /content/drive


## **Log in to your W&B account**

In [None]:
key=r"806d3d4c69ab21ca32efab6cdb0f286a7410f3db"

In [None]:
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

## **All Custom Classes**

In [None]:
class CustomPalmOilDataset(Dataset):
    def __init__(self, root_dir = cocastic_drive_account, transform = None, target_transform = None):
        self.root_dir = root_dir
        self.transform = transform
        self.target_transform = target_transform
        self.image_paths = []
        self.labels = []

        # Assuming 'Pure' corresponds to label 0 and 'Adulterated' corresponds to label 1
        label_mapping = {'Pure': 0, 'Adulterated': 1}

        for label_folder in os.listdir(root_dir):
            label = label_mapping.get(label_folder, -1)  # Assign -1 if folder not in mapping
            if label != -1:
                label_folder_path = os.path.join(root_dir, label_folder)
                for image_name in os.listdir(label_folder_path):
                    image_path = os.path.join(label_folder_path, image_name)
                    self.image_paths.append(image_path)
                    self.labels.append(label)


        combined_data = list(zip(self.image_paths, self.labels))
        random.shuffle(combined_data)
        self.image_paths, self.labels = zip(*combined_data)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)

        return image, label




class PalmOilClassifier(nn.Module):
    def __init__(self):
        super(PalmOilClassifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.conv3 = nn.Conv2d(16, 32, 5)  # Adding the third convolutional layer
        self.fc1 = nn.Linear(32 * 24 * 24, 120)  # Adjusting the input size for the fully connected layers
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)  # Output has 2 units for binary classification

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))  # Applying the third convolutional layer
        x = torch.flatten(x, 1)  # Flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  # Output layer with 2 units for binary classification
        return x

## All Custom Functions

In [None]:

def custom_collate_fn(batch):
    images = [item[0] for item in batch]
    labels = [item[1] for item in batch]
    images = torch.stack(images, dim=0)
    labels = torch.tensor(labels)
    return images, labels





def validate_model(model, valid_dl, loss_func, log_images=False, batch_idx=0):
    "Compute performance of the model on the validation dataset and log a wandb.Table"
    model.eval()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    val_loss = 0.
    with torch.inference_mode():
        correct = 0
        for i, (images, labels) in enumerate(valid_dl):
            images, labels = images.to(device), labels.to(device)

            # Forward pass ➡
            outputs = model(images)
            val_loss += loss_func(outputs, labels)*labels.size(0)

            # Compute accuracy and accumulate
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()

            # Log one batch of images to the dashboard, always same batch_idx.
            if i==batch_idx and log_images:
                log_image_table(images, predicted, labels, outputs.softmax(dim=1))
    return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)





def log_image_table(images, predicted, labels, probs):
    "Log a wandb.Table with (img, pred, target, scores)"
    # 🐝 Create a wandb Table to log images, labels, and predictions
    table = wandb.Table(columns=["image", "pred", "target"] + [f"score_{i}" for i in range(2)])
    for img, pred, targ, prob in zip(images.to("cpu"), predicted.to("cpu"), labels.to("cpu"), probs.to("cpu")):
        # Assuming your input images are in RGB format
        data = [wandb.Image(img.numpy().transpose(1, 2, 0) * 255), pred, targ] + prob.numpy().tolist()
        table.add_data(*data)
    wandb.log({"predictions_table": table}, commit=False)









def train_model(num_of_runs=2):
  for _ in range(num_of_runs):
    wandb.init(project = "Updated_Palm_Oil_Adulteration_Project_DLI",
               config = {
                "epochs": 8,
                "batch_size": 64,
                "lr": 0.0018})

    # Copy your config
    config = wandb.config
    # Set device (CPU or GPU)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


    train_dataloader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=custom_collate_fn)
    val_dataloader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=custom_collate_fn)
    n_steps_per_epoch = math.ceil(len(train_dataloader.dataset) / config.batch_size)

    model = PalmOilClassifier()
    model.to(device)


    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.lr)


    # Move the model to the appropriate device
    model.to(device)

    # Training loop

    example_ct = 0
    step_ct = 0
    for epoch in range(config.epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0





        for step, (images, labels) in enumerate(train_dataloader):
            # Move data to the appropriate device
            images = images.to(device)
            labels = labels.to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)
            train_loss = loss_func(outputs, labels)
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()

            example_ct += len(images)
            metrics = {"train/train_loss": train_loss,
                       "train/epoch": (step + 1 + (n_steps_per_epoch * epoch)) / n_steps_per_epoch,
                       "train/example_ct": example_ct}

            if step + 1 < n_steps_per_epoch:
                # 🐝 Log train metrics to wandb
                wandb.log(metrics)

            step_ct += 1


            val_loss, accuracy = validate_model(model, val_dataloader, loss_func, log_images=(epoch==(config.epochs-1)))


            # Log train and validation metrics to wandb
            val_metrics = {"val/val_loss": val_loss,
                       "val/val_accuracy": accuracy}
            wandb.log({**metrics, **val_metrics})




            print(f"Train Loss: {train_loss:.3f}, Valid Loss: {val_loss:3f}, Accuracy: {accuracy:.2f}")

    # If you had a test set, this is how you could log it as a Summary metric
    wandb.summary['test_accuracy'] = 0.8

    # 🐝 Close your wandb run
    wandb.finish()
    torch.save(model, cocastic_drive_account+'/model.pth')



In [None]:

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])



test_size = 0.2  # You can adjust the test size as needed
random_seed = 42  # Set a seed for reproducibility
batch_size = 64

dataset = CustomPalmOilDataset(transform=transform)

# Define the percentage split for training and testing
train_percentage = 0.8  # 80% for training, 20% for testing
dataset_size = len(dataset)
train_size = int(train_percentage * dataset_size)
test_size = dataset_size - train_size

# Split the dataset into train and test sets
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
print(f"Dataset Size: {dataset_size}, Train Size: {train_size}, Test Size: {test_size}")

Dataset Size: 1000, Train Size: 800, Test Size: 200


In [None]:
train_model()

[34m[1mwandb[0m: Currently logged in as: [33myelsongdabuo-tang[0m. Use [1m`wandb login --relogin`[0m to force relogin


Train Loss: 0.693, Valid Loss: 0.688770, Accuracy: 0.52
Train Loss: 0.700, Valid Loss: 0.703599, Accuracy: 0.48
Train Loss: 0.679, Valid Loss: 0.685772, Accuracy: 0.48
Train Loss: 0.657, Valid Loss: 0.637375, Accuracy: 0.65
Train Loss: 0.654, Valid Loss: 0.626609, Accuracy: 0.61
Train Loss: 0.550, Valid Loss: 0.670080, Accuracy: 0.60
Train Loss: 0.638, Valid Loss: 0.585839, Accuracy: 0.66
Train Loss: 0.480, Valid Loss: 0.675913, Accuracy: 0.62
Train Loss: 0.601, Valid Loss: 0.554813, Accuracy: 0.74
Train Loss: 0.543, Valid Loss: 0.557004, Accuracy: 0.76
Train Loss: 0.531, Valid Loss: 0.522778, Accuracy: 0.77
Train Loss: 0.649, Valid Loss: 0.543724, Accuracy: 0.75
Train Loss: 0.433, Valid Loss: 0.562462, Accuracy: 0.70
Train Loss: 0.540, Valid Loss: 0.554233, Accuracy: 0.72
Train Loss: 0.556, Valid Loss: 0.535004, Accuracy: 0.77
Train Loss: 0.537, Valid Loss: 0.513751, Accuracy: 0.80
Train Loss: 0.517, Valid Loss: 0.487147, Accuracy: 0.81
Train Loss: 0.425, Valid Loss: 0.460365, Accurac

0,1
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/example_ct,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/train_loss,██▆▆▆▆▆▄▄▂▄▄▄▄▃▄▃▃▃▃▃▂▃▂▂▂▃▂▂▄▂▂▂▂▄▃▁▃▁▂
val/val_accuracy,▂▁▃▃▅▅▆▆▆▆▅▆▇▆▆▆▆▇▇▇▇▆▇▇▇▇█▇▇▇▇▇▇▇▇█████
val/val_loss,████▆▆▆▅▅▆▅▄▄▃▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▃▃▃▃▂▂▂▁▁

0,1
test_accuracy,0.8
train/epoch,8.0
train/example_ct,6400.0
train/train_loss,0.15766
val/val_accuracy,0.945
val/val_loss,0.16658


Train Loss: 0.694, Valid Loss: 0.689404, Accuracy: 0.48
Train Loss: 0.682, Valid Loss: 0.976093, Accuracy: 0.48
Train Loss: 0.957, Valid Loss: 0.665639, Accuracy: 0.63
Train Loss: 0.678, Valid Loss: 0.683920, Accuracy: 0.52
Train Loss: 0.745, Valid Loss: 0.684597, Accuracy: 0.52
Train Loss: 0.690, Valid Loss: 0.687124, Accuracy: 0.57
Train Loss: 0.689, Valid Loss: 0.687195, Accuracy: 0.75
Train Loss: 0.686, Valid Loss: 0.684800, Accuracy: 0.53
Train Loss: 0.684, Valid Loss: 0.682704, Accuracy: 0.48
Train Loss: 0.677, Valid Loss: 0.683834, Accuracy: 0.48
Train Loss: 0.672, Valid Loss: 0.681394, Accuracy: 0.48
Train Loss: 0.674, Valid Loss: 0.669697, Accuracy: 0.48
Train Loss: 0.660, Valid Loss: 0.650719, Accuracy: 0.61
Train Loss: 0.631, Valid Loss: 0.631440, Accuracy: 0.69
Train Loss: 0.618, Valid Loss: 0.610008, Accuracy: 0.74
Train Loss: 0.648, Valid Loss: 0.593985, Accuracy: 0.69
Train Loss: 0.568, Valid Loss: 0.567108, Accuracy: 0.74
Train Loss: 0.542, Valid Loss: 0.630780, Accurac

VBox(children=(Label(value='46.194 MB of 46.194 MB uploaded (42.402 MB deduped)\r'), FloatProgress(value=1.0, …

0,1
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/example_ct,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/train_loss,▆█▆▆▆▅▅▅▄▄▄▄▃▃▃▄▃▂▃▂▂▂▂▃▂▃▃▂▄▂▂▂▂▂▁▁▂▁▁▁
val/val_accuracy,▁▃▂▂▁▄▄▄▅▄▅▅▅▆▆▆▆▇▇▇▆▇▇▇▆▇▇▇▆▇▇▇▇▇▇▇███▇
val/val_loss,█▇███▇▇▆▅█▅▅▅▅▅▄▄▃▃▃▄▂▂▂▃▄▄▃▃▂▃▂▂▂▂▂▂▁▁▁

0,1
test_accuracy,0.8
train/epoch,8.0
train/example_ct,6400.0
train/train_loss,0.09602
val/val_accuracy,0.905
val/val_loss,0.15141
