In [1]:
%pip install datasets
%pip install torch
%pip install torchvision

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting torchvision
  Using cached torchvision-0.18.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.6 kB)
Collecting torch==2.3.1 (from torchvision)
  Using cached torch-2.3.1-cp311-none-macosx_11_0_arm64.whl.metadata (26 kB)
Using cached torchvision-0.18.1-cp311-cp311-macosx_11_0_arm64.whl (1.6 MB)
Using cached torch-2.3.1-cp311-none-macosx_11_0_arm64.whl (61.0 MB)
Installing collected packages: torch, torchvision
  Attempting uninstall: torch
    Found existing installation: torch 2.2.2
    Uninstalling torch-2.2.2:
      Successfully uninstalled torch-2.2.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchtext 0.17.2 requires torch==2.2.2, but you have torch 2.3.1 which is incompatible.[0m[31m
[0mSuccessfully inst

In [2]:
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33mableal[0m ([33mwandb-smle[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
## Imports
from datasets import load_dataset, Dataset
import torch
import torch.nn as nn
from torchvision.transforms import v2
import torch.utils.data
import wandb
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F
import gc

In [5]:
#set run
run = wandb.init(entity="wandb-smle",
        project="aleal-domain-img", config="./config.yaml", save_code=True,
                 group="evaluate", force=True,
                 id="stilted-morning-60", resume="allow")

[34m[1mwandb[0m: Currently logged in as: [33mableal[0m ([33mwandb-smle[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
#load all data
torch.manual_seed(42)

device = torch.device("mps")

# fetch from hugging face
full_load = load_dataset("wltjr1007/DomainNet")
s_train_data = full_load['train']
test_set = full_load['test']

# split train set into train/validation
split_train_data = s_train_data.train_test_split(test_size=wandb.config["test_size"])
training_set = split_train_data["train"]
validation_set = split_train_data["test"]

# construct wandb artifact, add the datasets
hf_data_set = wandb.Artifact(name="hf-dataset", type="dataset")
names = ["training", "validation", "test"]
datasets = [training_set, validation_set, test_set]

name: str
dataset: Dataset
for name, dataset in zip(names, datasets):
    # 🐣 Store a new file in the artifact, and write something into its contents.
    dataset.set_format("torch")
    with hf_data_set.new_file(name + ".csv", mode="wb") as file:
        dataset.to_csv(file)


run.log_artifact(hf_data_set)

# turn into dataloaders and return

transforms = v2.Compose([
    v2.ToImage(),
    v2.Resize(size=(300, 300), antialias=True),
    v2.ToDtype(torch.float32, scale=True),  # Normalize expects float input
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_kwargs = {'batch_size': wandb.config["trainBatchSize"]}
test_kwargs = {'batch_size': 1000}

torch_training_set = training_set.with_format("torch")
torch_validation_set = validation_set.with_format("torch")
torch_testing_set = test_set.with_format("torch")

torch_training_set.set_transform(transforms)
torch_validation_set.set_transform(transforms)
torch_testing_set.set_transform(transforms)

train_loader = torch.utils.data.DataLoader(torch_training_set, **train_kwargs)
test_loader = torch.utils.data.DataLoader(torch_testing_set, **test_kwargs)
validation_loader = torch.utils.data.DataLoader(torch_validation_set, **test_kwargs)




Creating CSV from Arrow format: 100%|█████████████████████████████████████████████| 82/82 [01:39<00:00,  1.21s/ba]
Creating CSV from Arrow format: 100%|███████████████████████████████████████████| 328/328 [06:32<00:00,  1.20s/ba]
Creating CSV from Arrow format: 100%|███████████████████████████████████████████| 177/177 [02:53<00:00,  1.02ba/s]


In [8]:
#define the model
class imageModel(nn.Module):
    def __init__(self):
        super(imageModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, 1)
        self.conv2 = nn.Conv2d(16, 32, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(700928, 64)
        self.fc2 = nn.Linear(64, 345)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [14]:
artifact = run.use_artifact("DomainNet-Model:latest")
reuseModel = imageModel()
reuseModel.load_state_dict(torch.load(artifact.file(), map_location=torch.device('mps')))

<All keys matched successfully>

In [15]:
#evaluate testing set
reuseModel.eval()
test_loss=torch.tensor(float(0))
with torch.no_grad():
    for batch_idx, data in enumerate(test_loader):
        img, lbl = data["image"], data["label"]
        img, lbl = img.to(device), lbl.to(device)
        output = reuseModel(img)
        test_loss += F.nll_loss(output, lbl, reduction='sum').item()
        accuracy = batch_idx / len(validation_loader)

        if batch_idx % wandb.config["log_interval"] == 0:
            print('Test batch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
              batch_idx, batch_idx * len(img), len(validation_loader.dataset),
              batch_idx / len(validation_loader), test_loss))
            run.log({"test_acc": accuracy, "test_loss": test_loss}, step=batch_idx)



KeyboardInterrupt: 

In [16]:
run.finish()

VBox(children=(Label(value='51774.941 MB of 51774.941 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
test_acc,▁
test_loss,▁

0,1
test_acc,0.0
test_loss,5851.53027


In [None]:
## This did not end well