In [None]:
%matplotlib inline

In [None]:
# from __future__ import print_function, division, unicode_literals
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import seaborn as sns
import torch
import os


In [None]:
# use_cuda = True
# device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")
# device
# torch.cuda.device_count()
# train_dir = os.path.abspath("train")
# img_name = os.path.join(train_dir,"1.png")
# img_name

In [None]:
# picture_frames = pd.read_csv('train.csv')
# picture_name = picture_frames.iloc[:,0]
# label = picture_frames.iloc[:,1]
# picture_name = np.asarray(picture_name)
# label = np.asarray(label)

In [None]:
class CellPictureDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, root_dir, csv_file=None, transform=None, test=False):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        if csv_file is not None:
            self.data_frame = pd.read_csv(csv_file)
        self.root_dir = os.path.abspath(root_dir)
        self.transform = transform
        self.test = test

    def __len__(self):
        return len(os.listdir(self.root_dir))

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.data_frame.iloc[idx, 0])
        image = io.imread(img_name)
        if self.transform:
            self.image = self.transform(image)
        if self.test is False:
            label = self.data_frame.iloc[idx, 1]
            # label = np.asarray(label)
            # self.label = torch.from_numpy(label).unsqueeze_(0)
            # sample = [self.image, self.label]
            return self.image, label
        else:
            return self.image

In [None]:
# data_transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])])

In [None]:
# train_dataset = CellPictureDataset('train', csv_file='train.csv', transform=data_transform)
# test_dataset = CellPictureDataset('train',transform=data_transform, test=True)

In [None]:
import collections
class MyConvNet(nn.Module):
    def __init__(self, l1=120):
        super().__init__()
        self.model = nn.Sequential(collections.OrderedDict([
            ('conv1', nn.Conv2d(3,32,5,stride=5)),
            ('relu1', nn.ReLU()),
            ('conv2', nn.Conv2d(32,16,5,stride=2)),
            ('relu2', nn.ReLU()),
            # Put in a linear layers ...
            ('flatten', nn.Flatten()),                                          
            ('fc1', nn.Linear(1024,l1)),
            ('relu3', nn.ReLU()),
            ('fc3', nn.Linear(l1,4)),
        ]))

    def forward(self, x):
        x = self.model(x)
        return x


In [None]:
# model = MyConvNet()
# data = torch.ones(10,3,100,100)
# result = model(data)
# result.shape

In [None]:
from torch.utils.data import random_split
import torchvision
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from functools import partial

In [None]:
# train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
# test_loader = DataLoader(test_dataset, batch_size=4, shuffle=True, num_workers=0)

In [None]:
# loss_fn = nn.CrossEntropyLoss()
# learning_rate = 1e-2
# optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [None]:
# def training_loop():
#     total_train_step = 0
#     total_test_step = 0
#     epoch = 10
#     for i in range(epoch):
#         print("-------no.{} train begin".format(i+1))
#         for data in train_loader:
#             image, label = data
#             outputs = model(image)
#             loss = loss_fn(outputs, label)
            
#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             total_train_step = total_train_step + 1
#             if total_train_step % 100 == 0:
#                 print("trian times: {}, Loss: {}".format(total_train_step, loss.item()))
        


In [None]:
# training_loop()

In [None]:
# def load_data(train_dir="train/", test_dir = "test/"):
#     transform = transforms.Compose([
#         transforms.ToTensor(),
#         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
#     ])

#     train_dataset = CellPictureDataset(train_dir, csv_file='train.csv', transform=transform)
#     test_dataset = CellPictureDataset(test_dir,transform=transform, test=True)

#     return train_dataset, test_dataset

In [None]:
def train_cifar(config, checkpoint_dir=None, data_dir=None):
#     print('ok1')
    net = MyConvNet(config["l1"])
#     print('ok2')
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    # trainset, testset = load_data(data_dir)
    csv_dir = os.path.abspath("train.csv")
    data_transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])])
    train_dir = os.path.abspath("train")
    trainset = CellPictureDataset(train_dir, csv_file=csv_dir, transform=data_transform)

    test_abs = int(len(trainset) * 0.8)
    train_subset, val_subset = random_split(
        trainset, [test_abs, len(trainset) - test_abs])

    trainloader =DataLoader(
        train_subset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        num_workers=0)
    valloader = DataLoader(
        val_subset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        num_workers=0)

    for epoch in range(10):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
            if i % 100 == 99:  # print every 2000 mini-batches
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                                running_loss / epoch_steps))
                running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        with tune.checkpoint_dir(epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((net.state_dict(), optimizer.state_dict()), path)

        tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
#     print("Finished Training")

In [None]:
# config = {
#         "l1": 120,
#         "lr": 1e-2,
#         "batch_size": 4,
#         "momentum": 0.9
#     }
# train_cifar(config=config)

In [None]:
def test_accuracy(net, device="cpu"):
    root_dir = os.path.abspath("train")
    csv_dir = os.path.abspath("train.csv")
    data_transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])])
    trainset = CellPictureDataset(root_dir=root_dir, csv_file=csv_dir, transform=data_transform)
    testloader = DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
    

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [None]:
def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
    # train_dir = os.path.abspath("train")
    # load_data(train_dir)
    configs = {
        "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 5)),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([8, 16, 32, 64]),
        "momentum": tune.loguniform(5e-1, 9e-1)
    }
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"])
    result = tune.run(
        train_cifar,
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=configs,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

    best_trained_model = MyConvNet(best_trial.config["l1"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint_dir = best_trial.checkpoint.dir_or_data
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    test_acc = test_accuracy(best_trained_model, device)
    print("Best trial test set accuracy: {}".format(test_acc))


# if __name__ == "__main__":
    # You can change the number of GPUs per trial here:

In [None]:
# if __name__ == "__main__":
#     main(num_samples=10, max_num_epochs=10, gpus_per_trial=0)
    # config = {
    #     "l1": 120,
    #     "l2": 84,
    #     "lr": 1e-2,
    #     "batch_size": 4,
    #     "momentum": 0.9
    # }
    # train_cifar(config=config)