In [3]:
!pip install pytorch-metric-learning
!pip install faiss-gpu

Collecting pytorch-metric-learning
  Downloading pytorch_metric_learning-1.0.0-py3-none-any.whl (102 kB)
[?25l[K     |███▏                            | 10 kB 33.5 MB/s eta 0:00:01[K     |██████▍                         | 20 kB 22.6 MB/s eta 0:00:01[K     |█████████▋                      | 30 kB 18.1 MB/s eta 0:00:01[K     |████████████▊                   | 40 kB 15.7 MB/s eta 0:00:01[K     |████████████████                | 51 kB 7.9 MB/s eta 0:00:01[K     |███████████████████▏            | 61 kB 7.8 MB/s eta 0:00:01[K     |██████████████████████▎         | 71 kB 8.2 MB/s eta 0:00:01[K     |█████████████████████████▌      | 81 kB 9.1 MB/s eta 0:00:01[K     |████████████████████████████▊   | 92 kB 9.6 MB/s eta 0:00:01[K     |███████████████████████████████▉| 102 kB 7.7 MB/s eta 0:00:01[K     |████████████████████████████████| 102 kB 7.7 MB/s 
Installing collected packages: pytorch-metric-learning
Successfully installed pytorch-metric-learning-1.0.0
Collecting fais

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

### MNIST code originally from https://github.com/pytorch/examples/blob/master/mnist/main.py ###
from torchvision import datasets, transforms

from pytorch_metric_learning import distances, losses, miners, reducers, testers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator

######################## Timo's Code ########################
import pandas as pd
import numpy as np
import PIL
import os
from os.path import isfile, join
from google.colab import drive
import toml

class FAUPapyrusCollectionDataset(torch.utils.data.Dataset):
    """FAUPapyrusCollection dataset."""
    def __init__(self, root_dir, processed_frame, transform=None):

        self.root_dir = root_dir
        self.processed_frame = processed_frame
        self.transform = transform

    def __len__(self):
        return len(self.processed_frame)       

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.processed_frame.iloc[idx, 1])
        
        img_name = img_name + '.png'
        
        #image = io.imread(img_name , plugin='matploPILtlib')        
        image = PIL.Image.open(img_name)
        if self.transform:
            image = self.transform(image)         

        papyID = self.processed_frame.iloc[idx,3]


        return image, papyID
########################################################################    


### MNIST code originally from https://github.com/pytorch/examples/blob/master/mnist/main.py ###
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(12544, 128)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return 
        
def plot_acc(val_precision_at_1_values, epochs, output_path):  
  epochs = np.arange(1, epochs + 1)
  plt.style.use('seaborn')
  width = 460
  tex_fonts = {
      # Use LaTeX to write all text
      "text.usetex": True,
      "font.family": "serif",
      # Use 10pt font in plots, to match 10pt font in document
      "axes.labelsize": 10,
      "font.size": 10,
      # Make the legend/label fonts a little smaller
      "legend.fontsize": 8,
      "xtick.labelsize": 8,
      "ytick.labelsize": 8,
      "legend.loc":'lower left'
  }
  plt.rcParams.update(tex_fonts)
  
  fig, ax = plt.subplots(1, 1, figsize=set_size(width))
  ax.plot(epochs, val_precision_at_1_values, 'm', label='Val P@1', linestyle='dotted', linewidth=.3)


  ax.set_xlabel('Epochs')
  ax.set_ylabel('Accuracy')
  ax.legend()
  fig.savefig(output_path + '/acc.pdf', format='pdf', bbox_inches='tight')
  plt.close()

def plot_table(setting, param, dml_param, output_path):  
  ########## Plot Settings ##################
  setting_name_list = list(setting.keys())
  setting_value_list = list(setting.values())
  setting_name_list, setting_value_list = replace_helper(setting_name_list, setting_value_list)
  vals = np.array([setting_name_list, setting_value_list], dtype=str).T
  fig, ax = plt.subplots(1, 1, figsize=set_size(width))
  ax.table(cellText=vals, colLabels=['Setting', 'Value'], loc='center', zorder=3, rowLoc='left', cellLoc='left')
  ax.set_title('Experiment Settings')
  ax.set_xticks([])
  ax.set_yticks([])
  fig.savefig(output_path + '/settings.pdf', format='pdf', bbox_inches='tight')
  plt.close()

  ########## Plot Params ##################
  param_name_list = param.keys()
  param_value_list = param.values()
  param_name_list, param_value_list = replace_helper(param_name_list, param_value_list)
  param_vals = np.array([list(param_name_list), list(param_value_list)], dtype=str).T
  fig, ax = plt.subplots(1, 1, figsize=set_size(width))
  ax.table(cellText=param_vals, colLabels=['Hyperparameter', 'Value'], loc='center', zorder=3, rowLoc='left', cellLoc='left')
  ax.set_title('Hyperparaeters')
  ax.set_xticks([])
  ax.set_yticks([])
  fig.savefig(output_path + '/params.pdf', format='pdf', bbox_inches='tight')
  plt.close()

  ########## Plot DML Params ##################
  dml_param_name_list = dml_param.keys()
  dml_param_value_list = dml_param.values()
  dml_param_name_list, dml_param_value_list = replace_helper(dml_param_name_list, dml_param_value_list)
  dml_param_vals = np.array([list(dml_param_name_list), list(dml_param_value_list)], dtype=str).T  
  fig, ax = plt.subplots(1, 1, figsize=set_size(width))
  ax.table(cellText=dml_param_vals, colLabels=['DML Hyperparameter', 'Value'], loc='center', zorder=3, rowLoc='left', cellLoc='left')
  ax.set_title('DML Hyperparameters')
  ax.set_xticks([])
  ax.set_yticks([])
  fig.savefig(output_path + '/dml_params.pdf', format='pdf', bbox_inches='tight')
  plt.close()

def create_output_dir(name, experiment_name, x=1):
  while True:
        dir_name = (name + (str(x) + '_iteration_' if x is not 0 else '') + 'of_experiment_' + experiment_name).strip()
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)            

            return dir_name
        else:
            x = x + 1

def create_logging(setting, param, dml_param, train_losses, test_precisions_at_1, epochs, output_dir):
  plot_table(setting, param, dml_param, output_dir)
  plot_loss(train_losses, epochs, output_path)
  plot_acc(test_precisions_at_1, epochs, output_path)
  pdfs = ['/gradients.pdf', '/loss.pdf', '/acc.pdf', '/params.pdf','/dml_params.pdf', '/settings.pdf']
  bookmarks = ['Gradients', 'Loss', 'Accuracy', 'Hyperparameters','DML Hyperparameters', 'Seetings']
  merger = PdfFileMerger()

  for i, pdf in enumerate(pdfs):
      merger.append(output_dir + pdf, bookmark=bookmarks[i])
  
  pdf = FPDF()   
  pdf.add_page() 
  pdf.set_font("Helvetica", size = 6)
  
  f = open("log.txt", "r")
  for x in f:
    pdf.cell(200, 6, txt = x, ln = 1, align = 'l')

  pdf.output("log.pdf")   
  merger.append("log.pdf", bookmark='Log')
  merger.write(output_dir + "/report.pdf")
  merger.close()  
  copyfile('log.txt', output_dir + '/log.txt')

def plot_loss(train_losses, epochs, output_path):  
  
  epochs = np.arange(1, epochs + 1)
  train_loss_values = np.array(train_loss_values)
  val_loss_values = np.array(val_loss_values)
  plt.style.use('seaborn')
  width = 460
  
  if True:
    tex_fonts = {
        # Use LaTeX to write all text
        "text.usetex": False,
        "font.family": "serif",
        # Use 10pt font in plots, to match 10pt font in document
        "axes.labelsize": 10,
        "font.size": 10,
        # Make the legend/label fonts a little smaller
        "legend.fontsize": 8,
        "xtick.labelsize": 8,
        "ytick.labelsize": 8,
        "legend.loc":'lower left'
    }

  plt.rcParams.update(tex_fonts)
  
  fig, ax = plt.subplots(1, 1, figsize=set_size(width))

  # plot original lines
  ax.plot(epochs, train_loss_values, 'b', label='Training Loss', linestyle='dotted')
  #ax.plot(epochs, val_loss_values, 'g', label='Validation Loss', linestyle='dotted')

  
  ax.set_title('Training')
  ax.set_xlabel('Epochs')
  ax.set_ylabel('Loss')
  ax.legend()
  # Save and remove excess whitespace
  fig.savefig(output_path + '/loss.pdf', format='pdf', bbox_inches='tight')
  plt.close()


### MNIST code originally from https://github.com/pytorch/examples/blob/master/mnist/main.py ###
def train(model, loss_func, mining_func, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, labels) in enumerate(train_loader):
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        embeddings = model(data)
        indices_tuple = mining_func(embeddings, labels)
        loss = loss_func(embeddings, labels, indices_tuple)
        loss.backward()
        optimizer.step()
        if batch_idx % 20 == 0:
            print(
                "Epoch {} Iteration {}: Loss = {}, Number of mined triplets = {}".format(
                    epoch, batch_idx, loss, mining_func.num_triplets
                )
            )
        return loss

### convenient function from pytorch-metric-learning ###
def get_all_embeddings(dataset, model):
    tester = testers.BaseTester()
    return tester.get_all_embeddings(dataset, model)


### compute accuracy using AccuracyCalculator from pytorch-metric-learning ###
def test(train_set, test_set, model, accuracy_calculator):
    train_embeddings, train_labels = get_all_embeddings(train_set, model)
    test_embeddings, test_labels = get_all_embeddings(test_set, model)
    train_labels = train_labels.squeeze(1)
    test_labels = test_labels.squeeze(1)
    print("Computing accuracy")
    accuracies = accuracy_calculator.get_accuracy(
        test_embeddings, train_embeddings, test_labels, train_labels, False
    )
    print("Test set accuracy (Precision@1) = {}".format(accuracies["precision_at_1"]))
    return accuracies["precision_at_1"]

def gradient_visualization(parameters, output_dir):
    """
    Returns the parameter gradients over the epoch.
    :param parameters: parameters of the network
    :type parameters: iterator
    :param results_folder: path to results folder
    :type results_folder: str
    """
    tex_fonts = {
    # Use LaTeX to write all text
    "text.usetex": False,
    "font.family": "serif",
    # Use 10pt font in plots, to match 10pt font in document
    "axes.labelsize": 10,
    "font.size": 10,
    # Make the legend/label fonts a little smaller
    "legend.fontsize": 8,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "legend.loc":'lower left'
}

    plt.rcParams.update(tex_fonts)

    ave_grads = []
    layers = []


    for n, p in parameters:
        if (p.requires_grad) and ("bias" not in n):
            layers.append(n)
            ave_grads.append(p.grad.abs().mean())
    plt.plot(ave_grads, alpha=0.3, color="b")
    plt.hlines(0, 0, len(ave_grads) + 1, linewidth=1, color="k")
    plt.xticks(range(0, len(ave_grads), 1), layers, rotation="vertical")
    plt.xlim(xmin=0, xmax=len(ave_grads))
    plt.xlabel("Layers")
    plt.ylabel("average gradient")
    plt.title("Gradient Visualization")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(output_dir + "/gradients.pdf")
    plt.close()


device = torch.device("cuda")

transform = transforms.Compose(
    [transforms.Resize((32,32)), transforms.Grayscale(),transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)

### Timo's

drive.mount('/content/gdrive')

def create_processed_info(path, debug=False):
  if debug:
    info_path = join(path, 'debug_processed_info.csv')
  else:
    info_path = join(path, 'processed_info.csv')
  if isfile(info_path):
    processed_frame = pd.read_csv(info_path, index_col=0, dtype={'fnames':str,'papyID':int,'posinfo':str, 'pixelCentimer':float}, header=0)    
  else:    
    fnames = [f for f in listdir(path) if isfile(join(path, f))]
    fnames = [ x for x in fnames if ".png" in x ]
    fnames = [f.split('.',1)[0] for f in fnames]
    fnames_frame = pd.DataFrame(fnames,columns=['fnames'])
    fragmentID = pd.DataFrame([f.split('_',1)[0] for f in fnames], columns=['fragmentID'])
    fnames_raw = [f.split('_',1)[1] for f in fnames]
    processed_frame = pd.DataFrame(fnames_raw, columns=['fnames_raw'])
    
    processed_frame = pd.concat([processed_frame, fnames_frame], axis=1)

    processed_frame = pd.concat([processed_frame, fragmentID], axis=1)
    processed_frame['papyID'] = processed_frame.fnames_raw.apply(lambda x: x.split('_',1)[0])
    processed_frame['posinfo'] = processed_frame.fnames_raw.apply(lambda x: ''.join(filter(str.isalpha, x)))
    processed_frame['pixelCentimer'] = processed_frame.fnames_raw.progress_apply(retrive_size_by_fname)
    processed_frame.to_csv(info_path)
     
  return processed_frame

config = toml.load('./gdrive/MyDrive/mt/conf/conf.toml')
setting = config.get('settings')
param = config.get('params')
dml_param = config.get('dml_params')

output_dir = create_output_dir(setting['output'], setting['experiment_name'])
processed_frame_train = create_processed_info(setting['path_train'])
processed_frame_val = create_processed_info(setting['path_val'])

dataset1 = FAUPapyrusCollectionDataset(setting['path_train'], processed_frame_train, transform)
dataset2 = FAUPapyrusCollectionDataset(setting['path_train'], processed_frame_train, transform)

batch_size = 256

#dataset1 = datasets.MNIST(".", train=True, download=True, transform=transform)
#dataset2 = datasets.MNIST(".", train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1, batch_size=256, shuffle=True, drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset2, batch_size=256, drop_last=True)

model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01)
num_epochs = 4


### pytorch-metric-learning stuff ###
distance = distances.CosineSimilarity()
reducer = reducers.ThresholdReducer(low=0)
loss_func = losses.TripletMarginLoss(margin=0.2, distance=distance, reducer=reducer)
mining_func = miners.TripletMarginMiner(
    margin=0.2, distance=distance, type_of_triplets="semihard"
)
accuracy_calculator = AccuracyCalculator(include=("precision_at_1",), k=1)
### pytorch-metric-learning stuff ###

train_losses = []
test_precisions_at_1 = []


for epoch in range(1, num_epochs + 1):
    train_loss = train(model, loss_func, mining_func, device, train_loader, optimizer, epoch)
    test_precision_at_1 = test(dataset1, dataset2, model, accuracy_calculator)
    
    train_losses.append(train_loss)
    test_precisions_at_1.append(test_precision_at_1)
    gradient_visualization(model.named_parameters(), output_dir)
    create_logging(setting, param, dml_param, train_losses, test_precisions_at_1, epoch, output_dir)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


AttributeError: ignored