In [1]:
import os
import random
import pickle
import datetime

import torch
import torchmetrics
import torchsummary
import numpy as np
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset
from torch.utils.tensorboard import SummaryWriter
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import sys

parentdir = "C:\Projects\DateMatching"
sys.path.insert(0, parentdir) 
import utils

In [3]:
embeder = utils.PhotoEmbedingStorage('../Models/emb storage.pkl')

In [4]:
emb1 = embeder[13550]
emb2 = embeder[2169]
print('similarity', emb1 @ emb2.T)

similarity tensor([[0.7698]])


In [5]:
emb1 = embeder[13543]
emb2 = embeder[13544]
print('similarity', emb1 @ emb2.T)

similarity tensor([[0.6263]])


In [6]:
emb1 = embeder[13546]
emb2 = embeder[13547]
print('similarity', emb1 @ emb2.T)

similarity tensor([[0.5527]])


In [7]:
emb1 = embeder[13543]
emb2 = embeder[13550]
print('similarity', emb1 @ emb2.T)

similarity tensor([[0.3056]])


In [8]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
EMBED_DIM = 32

In [9]:
files_opposite = []
files_target = []

path = "data/markup_opposite/"
files_opposite += [int(f.split('.')[0]) for f in os.listdir(path)]
    
path =  "data/markup_target/"
files_target += [int(f.split('.')[0]) for f in os.listdir(path)]

path = "data/opposite/"
files_opposite += [int(f.split('.')[0]) for f in os.listdir(path)]

path =  "data/target/"
files_target += [int(f.split('.')[0]) for f in os.listdir(path)]

files_opposite = np.array(files_opposite)
files_target = np.array(files_target)

y_opposite = np.zeros_like(files_opposite, dtype='float32')
y_target = np.ones_like(files_target, dtype='float32')

X = np.concatenate([files_opposite, files_target])
Y = np.concatenate([y_opposite, y_target])

Xtrain, Xval, Ytrain, Yval = train_test_split(X, Y, test_size=0.2, random_state=69)

In [10]:
Xtest = []
Ytest = []

path = "data/test_target/"
ldir = [int(f.split('.')[0]) for f in os.listdir(path)]
Xtest+= ldir
Ytest+= [1. for _ in ldir]
    
path = "data/test_opposite/"
ldir = [int(f.split('.')[0]) for f in os.listdir(path)]
Xtest+= ldir
Ytest+= [0. for _ in ldir]

Xtest = np.array(Xtest)
Ytest = np.array(Ytest, dtype='float32')

In [11]:
embeds = np.concatenate([embeder[int(xid)] for xid in Xtrain])

In [12]:
pca = PCA(n_components=EMBED_DIM)
pca.fit(embeds)

In [13]:
pickle.dump(pca, open("../Models/pca.pkl","wb"))

In [14]:
trainLoader = torch.utils.data.DataLoader(
    utils.EmbedDataset(
        x=Xtrain, 
        y=Ytrain, 
        embeder=embeder,
        decompositor=pca.transform), 
    batch_size=2048, 
)

In [15]:
valLoader = torch.utils.data.DataLoader(
    utils.TestDataset(
        x=Xval, 
        y=Yval, 
        embeder=embeder,
        decompositor=pca.transform), 
    batch_size=2048, 
)

In [16]:
testLoader = torch.utils.data.DataLoader(
    utils.TestDataset(
        x=Xtest, 
        y=Ytest, 
        embeder=embeder,
        decompositor=pca.transform), 
    batch_size=2048, 
)

In [17]:
model = utils.Model(EMBED_DIM, d=[64, 64])
trainer = utils.Trainer(
    model=model.cuda(),
    stop_batch=10_000/2048,
    metric=torchmetrics.AUROC(),
    loss_fn=nn.BCEWithLogitsLoss(reduce=True),
    optimizer=torch.optim.Adam(model.parameters(), lr=3e-4),
)

acc = torchmetrics.Accuracy()
auc = torchmetrics.AUROC()



In [18]:
torchsummary.summary(model);

Layer (type:depth-idx)                   Param #
├─Sequential: 1-1                        --
|    └─Linear: 2-1                       2,112
|    └─Dropout: 2-2                      --
|    └─GELU: 2-3                         --
|    └─Linear: 2-4                       4,160
|    └─Dropout: 2-5                      --
|    └─GELU: 2-6                         --
|    └─Linear: 2-7                       65
|    └─Dropout: 2-8                      --
Total params: 6,337
Trainable params: 6,337
Non-trainable params: 0


In [20]:
name = 'InceptionResnetV1 vggface2 pca 32 '
board_name = name + datetime.datetime.now().strftime("%Y.%m.%d - %H-%M-%S")

log_dir = f"logs/fit/{board_name}"
writer = SummaryWriter(log_dir)

In [21]:
try:
    wait = 0
    patience = 50
    
    epoch = 0
    best_loss = -np.inf
    while wait < patience:
        train_loss = trainer.train(trainLoader, epoch)

        val_pred, val_true = trainer.val(valLoader)
        metrics = {
            'AUC': auc(val_pred.sigmoid(), val_true.int()),
            'ACC': acc(val_pred.sigmoid(), val_true.int()),
        }
        writer.add_scalar('Loss/train', train_loss, epoch)
        writer.add_scalar('AUC/train', trainer.metric.compute(), epoch)
        writer.add_scalar('AUC/val', metrics['AUC'], epoch)
        writer.add_scalar('ACC/val', metrics['ACC'], epoch)


        wait+=1
        epoch+=1
        if metrics['AUC'] > best_loss:
            checkpoint = trainer.checkpoint()
            torch.save(checkpoint, f'../Models/w/{name}.torch')
            best_loss = metrics['AUC']
            wait = 0


except KeyboardInterrupt:
    print("KeyboardInterrupt")

In [30]:
checkpoint = torch.load(f'../Models/w/{name}.torch')

In [31]:
name

'InceptionResnetV1 vggface2 pca 32'

In [23]:
model.load_state_dict(checkpoint['model'])

<All keys matched successfully>

In [29]:
Xtest

array([13543, 13544, 13545, 13546, 13547, 13550,  2169])

In [24]:
test_pred, test_true = trainer.val(testLoader)

In [36]:
print('AUC:', auc(test_pred.sigmoid(), test_true.int()))
print('ACC:', acc(test_pred.sigmoid(), test_true.int()))

AUC: tensor(0.)
ACC: tensor(0.4286)




In [27]:
torch.save(checkpoint, f'../Models/w/prod.torch')