In [None]:
from __future__ import print_function
from __future__ import division

import json

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from sklearn.metrics import roc_auc_score
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import device
from torchvision.models import AlexNet_Weights, ResNet18_Weights, feature_extraction, ResNet
from torchvision.models.feature_extraction import create_feature_extractor
import pandas as pd

print("PyTorch Version: ", torch.__version__)
print("Torchvision Version: ", torchvision.__version__)

# Top level data directory. Here we assume the format of the directory conforms
#   to the ImageFolder structure
data_dir = "/home/studente1/Desktop/vipera"

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "resnet"

# Number of classes in the dataset
num_classes = 2

# Batch size for training (change depending on how much memory you have)
batch_size = 8

# Number of epochs to train for
num_epochs = 40

# Flag for feature extracting. When False, we finetune the whole model,
#   when True we only update the reshaped layer params
feature_extract = True


def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4 * loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history


def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

    # Alexnet
    # model.classifier[6] = nn.Linear(4096,num_classes)

    # Resnet
    model.fc = nn.Linear(512, num_classes)


def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0
    print("********   use_pretrained: ", use_pretrained)
    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size


# Initialize the model for this run
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)

# Print the model we just instantiated
print(model_ft)

# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

# Create training and validation datasets
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
# Create training and validation dataloaders
dataloaders_dict = {
    x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in
    ['train', 'val']}

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Send the model to GPU
model_ft = model_ft.to(device)

# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name, param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t", name)
else:
    for name, param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t", name)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

# Train and evaluate
model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft,
                             num_epochs=num_epochs, is_inception=(model_name == "inception"))
del model_ft

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = "cpu"
# Initialize the non-pretrained version of the model used for this run
scratch_model, _ = initialize_model(model_name, num_classes, feature_extract=False, use_pretrained=False)
scratch_model = scratch_model.to(device)
scratch_optimizer = optim.SGD(scratch_model.parameters(), lr=0.001, momentum=0.9)
scratch_criterion = nn.CrossEntropyLoss()
_, scratch_hist = train_model(scratch_model, dataloaders_dict, scratch_criterion, scratch_optimizer,
                              num_epochs=num_epochs, is_inception=(model_name == "inception"))

ohist = []
shist = []

ohist = [h.cpu().numpy() for h in hist]
shist = [h.cpu().numpy() for h in scratch_hist]

plt.title("Validation Accuracy vs. Number of Training Epochs")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Accuracy")
plt.plot(range(1, num_epochs + 1), ohist, label="Pretrained")
plt.plot(range(1, num_epochs + 1), shist, label="Scratch")
plt.ylim((0, 1.))
plt.xticks(np.arange(1, num_epochs + 1, 1.0))
plt.legend()
plt.show()

import torch
import torchvision.models as models
import torchvision.transforms as transforms
import csv

modules = list(scratch_model.children())[:-1]
model = torch.nn.Sequential(*modules)

data_transforms = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

class1_data = pd.read_csv("/home/studente1/Downloads/aspis_out.csv")

class2_data = pd.read_csv("/home/studente1/Downloads/berus_out.csv")

features_t = []
labels_t = []
i_0 = 0
i_1 = 0
with torch.no_grad():
    for inputs, labels in dataloaders_dict['train']:
        inputs_t = inputs.to(device)
        batch_features_t = model(inputs_t)
        features_t.append(batch_features_t.squeeze())
        labels_t.append(labels.to(device))

features_tensor_t = torch.cat(features_t, dim=0)
labels_tensor_t = torch.cat(labels_t, dim=0)

# Creo due colonne della lunghezza di features_tensor_t e amplio features_tensor_t di due colonne
# per facilitare l'inserimento delle coordinate geografiche
x = 0
col1 = torch.empty(len(features_tensor_t), 1)
col2 = torch.empty(len(features_tensor_t), 1)
features_tensor_t = torch.cat((features_tensor_t, col1, col2), dim=1)

# Creo questo ciclo per assegnare ad ogni record in base al valore della label
# la coordinata geografica presa dal file csv corretto, inoltre la inserisco all'interno
# delle features che poi andranno trainate

for label in labels_tensor_t.numpy().flatten():
    if x == len(features_tensor_t)-1:
        break
    else:
        if labels_tensor_t.numpy()[x] == 0:
            lat_lon_data = class1_data.iloc[i_0]
            i_0 += 1
        elif labels_tensor_t.numpy()[x] == 1:
            lat_lon_data = class2_data.iloc[i_1]
            i_1 += 1
        else:
            print('Errore')
        lat_lon_tensor = torch.tensor([[lat_lon_data['latitude-norm'], lat_lon_data['longitude-norm']]])
        features_tensor_t[x][-2:] = lat_lon_tensor
        x += 1

features_v = []
labels_v = []

with torch.no_grad():
    for inputs, labels in dataloaders_dict['val']:
        inputs_v = inputs.to(device)
        batch_features_v = model(inputs_v)
        features_v.append(batch_features_v.squeeze())
        labels_v.append(labels.to(device))

features_tensor_v = torch.cat(features_v, dim=0)
labels_tensor_v = torch.cat(labels_v, dim=0)

# Creo due colonne della lunghezza di features_tensor_v e amplio features_tensor_v di due colonne
# per facilitare l'inserimento delle coordinate geografiche
x = 0
col1 = torch.empty(len(features_tensor_v), 1)
col2 = torch.empty(len(features_tensor_v), 1)
features_tensor_v = torch.cat((features_tensor_v, col1, col2), dim=1)

# Creo questo ciclo per assegnare ad ogni record in base al valore della label
# la coordinata geografica presa dal file csv corretto, inoltre la inserisco all'interno
# delle features che poi andranno trainate

for label in labels_tensor_v.numpy().flatten():
    if x == len(features_tensor_v)-1:
        break
    else:
        if labels_tensor_v.numpy()[x] == 0:
            lat_lon_data = class1_data.iloc[i_0]
            i_0 += 1
        elif labels_tensor_v.numpy()[x] == 1:
            lat_lon_data = class2_data.iloc[i_1]
            i_1 += 1
        else:
            print('Errore')
        lat_lon_tensor = torch.tensor([[lat_lon_data['latitude-norm'], lat_lon_data['longitude-norm']]])
        features_tensor_v[x][-2:] = lat_lon_tensor
        x += 1

#Utilizzo pandas per scrivere le features e le label che userò per il nuovo addestramento nel file output.csv
from sklearn import svm
import pandas as pd

labels_df = pd.DataFrame({'labels': labels_tensor_t.numpy().flatten()})
features_df = pd.DataFrame(features_tensor_t.numpy())
df= pd.concat([labels_df,features_df],axis=1)
print("\nI write features and labels into output.csv")
df.to_csv("/home/studente1/Desktop/vipera/output.csv", index=False)

# Utilizza le features e le etichette per addestrare un nuovo classificatore
# Ad esempio, addestramento di un classificatore lineare con scikit-learn:

print("\nTraining the model using latitude, longitude and features with SVM linear:")
# Addestro la rete con SVM
clf = svm.SVC(kernel='linear')
clf.fit(features_tensor_t.numpy(), labels_tensor_t.numpy().flatten())
# Valutazione del classificatore
print("Accuracy:", roc_auc_score(labels_tensor_v.numpy(), clf.predict(features_tensor_v.numpy())))
#https://scikit-learn.org/stable/modules/permutation_importance.html