In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
dataset = 'BreakHis2'
method = 'FedAdaGrad'
i = 9
epoch = 10
learningRate = 0.001
weightDecay = 0
mu_val = 0
train_dir = '/DATA1/Mangaldeep/V3/Dataset/BreakHis2/train'
val_dir = '/DATA1/Mangaldeep/V3/Dataset/BreakHis2/val'
test_dir = '/DATA1/Mangaldeep/V3/Dataset/BreakHis2/test'
base_weight_dir = '/DATA1/Mangaldeep/V3/Weight/FedAdaGrad'
base_log_dir = '/DATA1/Mangaldeep/V3/Log/FedAdaGrad'
avg_weight_dir = base_weight_dir+'/avg'+str(i)+'.pth'
logfilepath = base_log_dir+'/'+str(method)+'_'+str(dataset)+'_'+str(i)+'.txt'

In [3]:
# License: BSD
# Author: Sasank Chilamkurthy

from __future__ import print_function, division

import os
import time
import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torchvision.transforms import ToTensor,Resize,Normalize,RandomHorizontalFlip,RandomVerticalFlip,RandomCrop,CenterCrop
from sklearn.metrics import accuracy_score,precision_recall_fscore_support,roc_curve,auc,roc_auc_score,classification_report
import matplotlib.pyplot as plt


cudnn.benchmark = True
plt.ion()   # interactive mode

seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [4]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        Resize((256,256),interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.RandomCrop(224),
        #transforms.RandomResizedCrop(size = (224,224)),
        #transforms.CenterCrop(224),
        transforms.RandomHorizontalFlip(p=0.3),
        transforms.RandomVerticalFlip(p=0.3),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        #transforms.Resize(256),
        Resize((256,256),interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

#data_dir = 'data/hymenoptera_data'
train_dataset = datasets.ImageFolder(train_dir,data_transforms['train'])
test_dataset = datasets.ImageFolder(val_dir,data_transforms['val'])

# define a loader for the training data we can iterate through in 32-image batches
train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=10,
        num_workers=1,
        shuffle=True
    )

# define a loader for the testing data we can iterate through in 32-image batches
test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=10,
        num_workers=1,
        shuffle=True
    )

dataset_sizes = {'train': len(train_dataset) ,'val': len(test_dataset)}
class_names = train_dataset.classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
sample_size = len(train_dataset)
with open (logfilepath, 'a') as file:
    file.writelines(f"{method} training started for {dataset} dataset")
    file.writelines('\n')
    file.writelines(f"{i}th round learning rate {learningRate} mu value {mu_val}")
    file.writelines('\n')
    file.writelines(f"Train sample_size: {len(train_dataset)} ,validation: {len(test_dataset)}")
    file.writelines('\n')

In [5]:
train_err = []
val_err = []

In [6]:
def difference_models_norm_2(model_1, model_2):
    """Return the norm 2 difference between the two model parameters
    """

    tensor_1=list(model_1.parameters())
    tensor_2=list(model_2.parameters())

    norm=sum([torch.sum((tensor_1[i]-tensor_2[i])**2)
        for i in range(len(tensor_1))])

    return norm

def train_model(model, criterion, optimizer, scheduler, num_epochs=25,mu=0.5):
    since = time.time()
    #change1
    model_0 = copy.deepcopy(model)
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_valLoss = 0.0
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            if phase == 'train' :
              dataloaders = train_loader
            else :
              dataloaders = test_loader
            # Iterate over data.
            for inputs, labels in dataloaders:
                inputs = inputs.to(device)
                labels = labels.to(device)
                #print(len(labels))
                #print(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    #print(loss)
                    loss += len(labels)*mu/2*difference_models_norm_2(model,model_0)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            if phase == 'train':
              train_err.append(epoch_loss)
            else :
              val_err.append(epoch_loss)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
            with open (logfilepath, 'a') as file:
              file.writelines(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
              file.writelines('\n')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                best_valLoss = epoch_loss

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f} Validation loss : {best_valLoss:4f}')
    with open (logfilepath, 'a') as file:
              file.writelines(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
              file.writelines('\n')
              file.writelines(f'Best val Acc: {best_acc:4f} Validation loss : {best_valLoss:4f}')
              file.writelines('\n')
              file.writelines(f'Training error = {train_err}')
              file.writelines('\n')
              file.writelines(f'Validation error = {val_err}')
              file.writelines('\n')


    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [7]:
model_ft = models.efficientnet_b3(weights=None)
num_ftrs = model_ft.classifier[1].in_features
model_ft.classifier[1] = nn.Linear(num_ftrs, 2)

if avg_weight_dir != '':
  model_ft.load_state_dict(torch.load(avg_weight_dir,map_location = 'cpu'))


model_ft = model_ft.to(device)
model_ft.train()
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
#optimizer_ft = optim.SGD(model_ft.parameters(), lr=learningRate, momentum=0.9)
optimizer_ft = optim.Adam(model_ft.parameters(), lr=learningRate) ## till 4th ,weight_decay=0.005
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.5)

with open (logfilepath, 'a') as file:
    file.writelines(f"Learning rate {learningRate} , Weight decay {weightDecay}")
    file.writelines('\n')

In [8]:
#print(model_ft)

In [9]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=epoch,mu=mu_val)

Epoch 0/9
----------
train Loss: 0.5868 Acc: 0.7049
val Loss: 0.6731 Acc: 0.5200

Epoch 1/9
----------
train Loss: 0.5411 Acc: 0.7593
val Loss: 0.6774 Acc: 0.6300

Epoch 2/9
----------
train Loss: 0.5398 Acc: 0.7650
val Loss: 0.5950 Acc: 0.6700

Epoch 3/9
----------
train Loss: 0.5554 Acc: 0.7364
val Loss: 1.1336 Acc: 0.8200

Epoch 4/9
----------
train Loss: 0.4779 Acc: 0.7994
val Loss: 0.4623 Acc: 0.7900

Epoch 5/9
----------
train Loss: 0.4775 Acc: 0.7736
val Loss: 0.4852 Acc: 0.8500

Epoch 6/9
----------
train Loss: 0.4504 Acc: 0.8052
val Loss: 0.5027 Acc: 0.8600

Epoch 7/9
----------
train Loss: 0.4439 Acc: 0.8080
val Loss: 0.4970 Acc: 0.8100

Epoch 8/9
----------
train Loss: 0.4498 Acc: 0.8080
val Loss: 0.3796 Acc: 0.8600

Epoch 9/9
----------
train Loss: 0.4102 Acc: 0.8138
val Loss: 0.3990 Acc: 0.8500

Training complete in 2m 52s
Best val Acc: 0.860000 Validation loss : 0.502719


In [10]:
model_weight_dir = base_weight_dir+'/' +str(method)+'_'+str(dataset)+'_'+str(i)+'_'+str(sample_size)+'.pth'
torch.save(model_ft.state_dict(), model_weight_dir)

In [11]:
test_dataset = datasets.ImageFolder(test_dir, transform=data_transforms['val'])
class_names = test_dataset.classes
test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=1,
        num_workers=0,
        shuffle=False
    )
model_ft.eval()
with torch.no_grad():
  y_true=[]
  y_pred=[]
  y_probas = []
  for i, (inputs, labels) in enumerate(test_loader):
    inputs = inputs.to(device)
    labels = labels.to(device)
    y_true.append(labels.detach().cpu().numpy())
    model_ft.to(device)
    outputs = model_ft(inputs)
    outputs = nn.Softmax(dim=1)(outputs)
    max, preds = torch.max(outputs, 1)
    y_probas.append(outputs.cpu().numpy())
    y_pred.append(int(preds.detach().cpu().numpy()))


y_true = np.array(y_true)
y_pred = np.array(y_pred)
prob = np.array(y_probas)
acc = accuracy_score(y_true,y_pred)
y_probas = prob.reshape(prob.shape[0],prob.shape[2])
precision,recall,fscore,_=precision_recall_fscore_support(y_true, y_pred, average='macro')
auc_score = roc_auc_score(y_true, y_probas[:,1])
precision,recall,fscore,_=precision_recall_fscore_support(y_true, y_pred, average='macro')
print(f"accuracy = {round(acc,2)} , precision = {round(precision,2)},recall = {round(recall,2)},fscore = {round(fscore,2)},,auc_score = {round(auc_score,2)}")
print(classification_report(y_true,y_pred))
with open (logfilepath, 'a') as file:
    file.writelines(f"accuracy = {round(acc,2)} , precision = {round(precision,2)},recall = {round(recall,2)},fscore = {round(fscore,2)},,auc_score = {round(auc_score,2)}")
    file.writelines('\n')
    file.writelines(classification_report(y_true,y_pred))
    file.writelines('\n')

accuracy = 0.87 , precision = 0.87,recall = 0.82,fscore = 0.84,,auc_score = 0.87
              precision    recall  f1-score   support

           0       0.86      0.70      0.77        63
           1       0.87      0.95      0.91       138

    accuracy                           0.87       201
   macro avg       0.87      0.82      0.84       201
weighted avg       0.87      0.87      0.87       201

