In [1]:
import argparse
import sys
from sentence_transformers import SentenceTransformer
from sentence_transformers_local import models, losses, SentenceTransformerSequential
from models.Transformers import SCCLBert
from learners.cluster import ClusterLearner
from dataloader.dataloader import augment_loader, augment_loader_split

from training import training
#from training_error_analysis import training

from utils.kmeans import get_kmeans_centers
from utils.logger import setup_path
from utils.randomness import set_global_random_seed
import torch
import pandas as pd
import os
from torch import nn

In [2]:
# !pip install torch
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]='4'

# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print(torch.cuda.device_count())

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda
1
Tesla V100-SXM2-32GB-LS
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [3]:
MODEL_CLASS = {
    "distil": 'distilbert-base-nli-stsb-mean-tokens', 
    "robertabase": 'roberta-base-nli-stsb-mean-tokens',
    "robertalarge": 'roberta-large-nli-stsb-mean-tokens',
    "msmarco": 'distilroberta-base-msmarco-v2',
    "xlm": "xlm-r-distilroberta-base-paraphrase-v1",
    "bertlarge": 'bert-large-nli-stsb-mean-tokens',
    "bertbase": 'bert-base-nli-stsb-mean-tokens',
    "paraphrase": "paraphrase-mpnet-base-v2",
    "paraphrase-distil": "paraphrase-distilroberta-base-v2",
    "paraphrase-Tiny" : "paraphrase-TinyBERT-L6-v2",
    "stanford-sentiment-roberta" : "stanford-sentiment-treebank-roberta.2021-03-11"
}

parser = argparse.ArgumentParser()
# parser.add_argument('--gpuid', nargs="+", type=int, default=[0], help="The list of gpuid, ex:--gpuid 3 1. Negative value means cpu-only")
parser.add_argument('--seed', type=int, default=200, help="")
parser.add_argument('--print_freq', type=float, default=400, help="")  
parser.add_argument('--result_path', type=str, default='./results/')

parser.add_argument('--bert', type=str, default='paraphrase', help="")
#parser.add_argument('--bert', type=str, default='distil', help="")

parser.add_argument('--bert_model', type=str, default='bert-base-uncased', help="")
parser.add_argument('--note', type=str, default='_search_snippets_distil_lre-4_JSD', help="")

# Dataset
# stackoverflow/stackoverflow_true_text
parser.add_argument('--dataset', type=str, default='search_snippets', help="")
#parser.add_argument('--dataset', type=str, default='stackoverflow', help="")
# parser.add_argument('--data_path', type=str, default='./datasets/stackoverflow/')
parser.add_argument('--max_length', type=int, default=32)
parser.add_argument('--train_val_ratio', type=float, default= [0.9, 0.1])

# Data for train and test
# ###### AgNews
# parser.add_argument('--data_path', type=str, default='./datasets/')
# parser.add_argument('--dataname', type=str, default='agnewsdataraw-8000', help="")
# parser.add_argument('--dataname_val', type=str, default='agnewsdataraw-8000', help="")
# parser.add_argument('--num_classes', type=int, default=4, help="")
# ####### SearchSnippets
parser.add_argument('--data_path', type=str, default='./datasets/augmented/contextual_20_2col_bert/')
# ## parser.add_argument('--dataname', type=str, default='train_search_snippets.csv', help="")
# ## parser.add_argument('--dataname_val', type=str, default='test_search_snippets.csv', help="")
# parser.add_argument('--dataname', type=str, default='search_snippets', help="")
# parser.add_argument('--dataname_val', type=str, default='search_snippets', help="")
# parser.add_argument('--num_classes', type=int, default=8, help="")
# ###### StackOverFlow
# parser.add_argument('--data_path', type=str, default='./datasets/stackoverflow/')
# parser.add_argument('--dataname', type=str, default='stackoverflow', help="")
# parser.add_argument('--dataname_val', type=str, default='stackoverflow', help="")
# parser.add_argument('--num_classes', type=int, default=20, help="")
# ###### Biomedical
# parser.add_argument('--data_path', type=str, default='./datasets/biomedical/')
# parser.add_argument('--dataname', type=str, default='biomedical', help="")
# parser.add_argument('--dataname_val', type=str, default='biomedical', help="")
# parser.add_argument('--num_classes', type=int, default=20, help="")
# ######## Tweet
# parser.add_argument('--data_path', type=str, default='./datasets/')
parser.add_argument('--dataname', type=str, default='tweet_remap_label', help="")
parser.add_argument('--dataname_val', type=str, default='tweet_remap_label', help="")
parser.add_argument('--num_classes', type=int, default=89, help="")
# ######## GoogleNewsTS
# parser.add_argument('--data_path', type=str, default='./datasets/')
# parser.add_argument('--dataname', type=str, default='TS', help="")
# parser.add_argument('--dataname_val', type=str, default='TS', help="")
# parser.add_argument('--num_classes', type=int, default=152, help="")
# ######## GoogleNewsT
# parser.add_argument('--data_path', type=str, default='./datasets/')
# parser.add_argument('--dataname', type=str, default='T', help="")
# parser.add_argument('--dataname_val', type=str, default='T', help="")
# parser.add_argument('--num_classes', type=int, default=152, help="")
# ######## GoogleNewsS
# parser.add_argument('--data_path', type=str, default='./datasets/')
# parser.add_argument('--dataname', type=str, default='S', help="")
# parser.add_argument('--dataname_val', type=str, default='S', help="")
# parser.add_argument('--num_classes', type=int, default=152, help="")

# Learning parameters
parser.add_argument('--lr', type=float, default=1e-6, help="") #learning rate
parser.add_argument('--lr_scale', type=int, default=100, help="")
parser.add_argument('--max_iter', type=int, default=30000)
parser.add_argument('--batch_size', type=int, default=256) #batch size

# CNN Setting
#parser.add_argument('--out_channels', type=int, default=768)
#parser.add_argument('--use_cnn', type5yh=str, default='cnn_1')
#parser.add_argument('--use_cnn', type=str, default='cnn_3')
#parser.add_argument('--use_cnn', type=str, default='cnn_5')
#parser.add_argument('--use_cnn', type=str, default='cnn_7')
#parser.add_argument('--use_cnn', type=str, default='cnn_cat')
#parser.add_argument('--use_cnn', type=str, default='cnn_avg')

# Contrastive learning
parser.add_argument('--use_head', type=bool, default=False)
parser.add_argument('--use_normalize', type=bool, default=False)

parser.add_argument('--weighted_local', type=bool, default=False, help="")
#parser.add_argument('--normalize_method', type=str, default='inverse_prob', help="")
parser.add_argument('--normalize_method', type=str, default='none', help="")

parser.add_argument('--contrastive_local_scale', type=float, default=0.00) #scale of contrastive loss
parser.add_argument('--contrastive_global_scale', type=float, default=0.01) #scale of contrastive loss
parser.add_argument('--temperature', type=float, default=0.5, help="temperature required by contrastive loss")
parser.add_argument('--base_temperature', type=float, default=0.1, help="temperature required by contrastive loss")

# Clustering
# default = 0.02
parser.add_argument('--clustering_scale', type=float, default=0.01) #scale of clustering loss
parser.add_argument('--use_perturbation', action='store_true', help="")
parser.add_argument('--alpha', type=float, default=1)

args = parser.parse_args(args=[])
# args.use_gpu = args.gpuid[0] >= 0
args.resPath = None
args.tensorboard = None

In [4]:
resPath, tensorboard = setup_path(args)
args.resPath, args.tensorboard = resPath, tensorboard
set_global_random_seed(args.seed)

# Dataset loader
train_loader = augment_loader(args)

# torch.cuda.set_device(args.gpuid[0])
# torch.cuda.set_device(device)

# Initialize cluster centers
# by performing k-means after getting embeddings from Sentence-BERT with mean-pooling(defualt)
sbert = SentenceTransformer(MODEL_CLASS[args.bert])
cluster_centers = get_kmeans_centers(sbert, train_loader, args.num_classes) 


# Model
# 1. Transformer model 
# use Huggingface/transformers model (like BERT, RoBERTa, XLNet, XLM-R) for mapping tokens to embeddings
# word_embedding_model = models.Transformer(MODEL_CLASS[args.bert])

word_embedding_model = models.Transformer('sentence-transformers/paraphrase-mpnet-base-v2')
#word_embedding_model = models.Transformer('sentence-transformers/distilbert-base-nli-stsb-mean-tokens')

# model = SentenceTransformer('distilbert-base-nli-mean-tokens')
dimension = word_embedding_model.get_word_embedding_dimension()
# word_embedding_model = torch.nn.DataParallel(word_embedding_model)


# 2. CNN model
# cnn = models.CNN(in_word_embedding_dimension = word_embedding_model.get_word_embedding_dimension(), 
#                  use_cnn = args.use_cnn, out_channels = word_embedding_model.get_word_embedding_dimension())

# 3. Pooling 
# pooling_model = models.Pooling(cnn.get_word_embedding_dimension(),
#                                pooling_mode_mean_tokens=True,
#                                pooling_mode_cls_token=False,
#                                pooling_mode_max_tokens=False)
pooling_model = models.Pooling(dimension,
                               pooling_mode_mean_tokens=True,
                               pooling_mode_cls_token=False,
                               pooling_mode_max_tokens=False, 
                               pooling_mode_weighted_tokens = False)

# 4. Feature extractor 
#feature_extractor = SentenceTransformerSequential(modules=[word_embedding_model, cnn, pooling_model])
feature_extractor = SentenceTransformerSequential(modules=[word_embedding_model, pooling_model], device = 'cuda')

# 5. main model
model = SCCLBert(feature_extractor, cluster_centers=cluster_centers, alpha = args.alpha, use_head = args.use_head)  


# Optimizer 
optimizer = torch.optim.Adam([
    {'params':word_embedding_model.parameters(), 'lr': args.lr*6},
#    {'params':cnn.parameters(), 'lr': args.lr*50},
    {'params':pooling_model.parameters()},
#    {'params':model.head.parameters(), 'lr': args.lr*20},
    {'params':model.cluster_centers, 'lr': args.lr*60}], lr=args.lr)
# # optimizer = torch.optim.Adam(lr=1e-4,params=model.parameters())
# optimizer = torch.optim.AdamW([
#     {'params':word_embedding_model.parameters(), 'lr': args.lr},
# #    {'params':cnn.parameters(), 'lr': args.lr*50},
#     {'params':pooling_model.parameters()},
# #    {'params':model.head.parameters(), 'lr': args.lr*args.lr_scale},
#     {'params':model.cluster_centers, 'lr': args.lr*20}], lr=args.lr)
# # optimizer = torch.optim.Adam(lr=1e-4,params=model.parameters())
print(optimizer)


# Set up the trainer    
learner = ClusterLearner(model, feature_extractor, optimizer, args.temperature, args.base_temperature,
                         args.contrastive_local_scale, args.contrastive_global_scale, args.clustering_scale, use_head = args.use_head, use_normalize = args.use_normalize)
# learner = torch.nn.DataParallel(learner)
learner = learner.cuda()

# split train - validation
if(args.train_val_ratio != -1):
    train_loader, val_loader = augment_loader_split(args)
    training(train_loader, learner, args, val_loader = val_loader)
# normal
else:
    training(train_loader, learner, args)

results path: ./results/SCCL.paraphrase.search_snippets.lr1e-06.lrscale100.tmp0.5.alpha1.seed200/
all_embeddings:(2472, 768), true_labels:2472, pred_labels:2472
true_labels tensor([86,  5, 28,  ..., 52, 13, 71])
pred_labels tensor([25, 88, 66,  ..., 40, 52, 28], dtype=torch.int32)
Iterations:11, Clustering ACC:0.656, centers:(89, 768)
initial_cluster_centers =  torch.Size([89, 768])
Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 6e-06
    weight_decay: 0

Parameter Group 1
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 1e-06
    weight_decay: 0

Parameter Group 2
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 5.9999999999999995e-05
    weight_decay: 0
)




train_sample 0.9 2225
val_sample 0.1 247

=30000/9=Iterations/Batches
[0]-----
contrastive_local_loss:	 1.49995
contrastive_global_loss:	 0.01470
clustering_loss:	 0.00050
local_consistency_loss:	 0.00000
------------- Evaluate Training Set -------------
------------- 9 batches -------------
all_pred 89
[Representation] Clustering scores: {'NMI': 0.8661505176447416, 'ARI': 0.518616472636438, 'AMI': 0.8247033701877889}
[Representation] ACC: 0.6400
[Representation] ACC sklearn: 0.0000
[Model] Clustering scores: {'NMI': 0.8731683981547065, 'ARI': 0.5382659327082056, 'AMI': 0.8341778315591768}
[Model] ACC: 0.6517
[Model] ACC sklearn: 0.0067
------------- Evaluate Validation Set -------------
------------- 1 batches -------------
all_pred 73
[Representation] Clustering scores: {'NMI': 0.9047417427789239, 'ARI': 0.5550734499498651, 'AMI': 0.7460598514105639}
[Representation] ACC: 0.6964
[Representation] ACC sklearn: 0.0000
[Model] Clustering scores: {'NMI': 0.9204320552143644, 'ARI': 0.58698

[3200]-----
contrastive_local_loss:	 13.44820
contrastive_global_loss:	 0.00231
clustering_loss:	 0.00445
local_consistency_loss:	 0.00000
------------- Evaluate Training Set -------------
------------- 9 batches -------------
all_pred 66
[Representation] Clustering scores: {'NMI': 0.9274026978422565, 'ARI': 0.9101075351715607, 'AMI': 0.9083065996181332}
[Representation] ACC: 0.8670
[Representation] ACC sklearn: 0.0000
[Model] Clustering scores: {'NMI': 0.8573226327679683, 'ARI': 0.7455749724501498, 'AMI': 0.8237105370697269}
[Model] ACC: 0.7308
[Model] ACC sklearn: 0.0063
------------- Evaluate Validation Set -------------
------------- 1 batches -------------
all_pred 57
[Representation] Clustering scores: {'NMI': 0.935334601301708, 'ARI': 0.8061400939116684, 'AMI': 0.8434958663263169}
[Representation] ACC: 0.8057
[Representation] ACC sklearn: 0.0081
[Model] Clustering scores: {'NMI': 0.9030186297444818, 'ARI': 0.6840616574364206, 'AMI': 0.7853150920318553}
[Model] ACC: 0.7368
[Model

all_pred 54
[Representation] Clustering scores: {'NMI': 0.8926133911300635, 'ARI': 0.7692024479354101, 'AMI': 0.8690481257672735}
[Representation] ACC: 0.7699
[Representation] ACC sklearn: 0.0184
[Model] Clustering scores: {'NMI': 0.8500051091415418, 'ARI': 0.6321766081600283, 'AMI': 0.8183140530657986}
[Model] ACC: 0.6593
[Model] ACC sklearn: 0.0022
------------- Evaluate Validation Set -------------
------------- 1 batches -------------
all_pred 49
[Representation] Clustering scores: {'NMI': 0.9009611401771751, 'ARI': 0.6767218211023651, 'AMI': 0.7611521547416018}
[Representation] ACC: 0.7368
[Representation] ACC sklearn: 0.0040
[Model] Clustering scores: {'NMI': 0.8974058717007118, 'ARI': 0.6540940452095189, 'AMI': 0.7847305121038511}
[Model] ACC: 0.7166
[Model] ACC sklearn: 0.0000
[6800]-----
contrastive_local_loss:	 17.26166
contrastive_global_loss:	 0.00190
clustering_loss:	 0.00531
local_consistency_loss:	 0.00000
------------- Evaluate Training Set -------------
------------- 9

------------- Evaluate Validation Set -------------
------------- 1 batches -------------
all_pred 51
[Representation] Clustering scores: {'NMI': 0.8731141200754832, 'ARI': 0.5212198027109813, 'AMI': 0.681553529139789}
[Representation] ACC: 0.6437
[Representation] ACC sklearn: 0.0324
[Model] Clustering scores: {'NMI': 0.8908894220035225, 'ARI': 0.5910896769043513, 'AMI': 0.7626172453596318}
[Model] ACC: 0.6478
[Model] ACC sklearn: 0.0081
[10000]-----
contrastive_local_loss:	 20.05516
contrastive_global_loss:	 0.00197
clustering_loss:	 0.00448
local_consistency_loss:	 0.00000
------------- Evaluate Training Set -------------
------------- 9 batches -------------
all_pred 52
[Representation] Clustering scores: {'NMI': 0.8134832893502955, 'ARI': 0.45660743122574776, 'AMI': 0.7613759806615126}
[Representation] ACC: 0.5425
[Representation] ACC sklearn: 0.0126
[Model] Clustering scores: {'NMI': 0.8451401654837246, 'ARI': 0.6211998937345369, 'AMI': 0.8112131588796638}
[Model] ACC: 0.6049
[Mod

all_pred 48
[Representation] Clustering scores: {'NMI': 0.8787954318889588, 'ARI': 0.542250937017591, 'AMI': 0.6942292731377492}
[Representation] ACC: 0.6478
[Representation] ACC sklearn: 0.0000
[Model] Clustering scores: {'NMI': 0.8975323338965496, 'ARI': 0.6182977988427159, 'AMI': 0.7805777602325559}
[Model] ACC: 0.6640
[Model] ACC sklearn: 0.0081
[13200]-----
contrastive_local_loss:	 21.47422
contrastive_global_loss:	 0.00149
clustering_loss:	 0.00401
local_consistency_loss:	 0.00000
------------- Evaluate Training Set -------------
------------- 9 batches -------------
all_pred 54
[Representation] Clustering scores: {'NMI': 0.794450942336939, 'ARI': 0.37582934628133796, 'AMI': 0.7302158000861094}
[Representation] ACC: 0.4625
[Representation] ACC sklearn: 0.0189
[Model] Clustering scores: {'NMI': 0.8435932701777171, 'ARI': 0.604726264522164, 'AMI': 0.8090010603501772}
[Model] ACC: 0.5708
[Model] ACC sklearn: 0.0022
------------- Evaluate Validation Set -------------
------------- 1 

[Representation] Clustering scores: {'NMI': 0.8682470820977852, 'ARI': 0.46513746041532383, 'AMI': 0.6602004442903066}
[Representation] ACC: 0.6032
[Representation] ACC sklearn: 0.0243
[Model] Clustering scores: {'NMI': 0.8829457926658837, 'ARI': 0.5670685959022821, 'AMI': 0.7465322639285638}
[Model] ACC: 0.6275
[Model] ACC sklearn: 0.0040
[16400]-----
contrastive_local_loss:	 24.50064
contrastive_global_loss:	 0.00143
clustering_loss:	 0.00354
local_consistency_loss:	 0.00000
------------- Evaluate Training Set -------------
------------- 9 batches -------------
all_pred 54
[Representation] Clustering scores: {'NMI': 0.7960713988592018, 'ARI': 0.3834285641119617, 'AMI': 0.7335636146276261}
[Representation] ACC: 0.4634
[Representation] ACC sklearn: 0.0135
[Model] Clustering scores: {'NMI': 0.8386892116405954, 'ARI': 0.563332196641291, 'AMI': 0.8023976340394945}
[Model] ACC: 0.5533
[Model] ACC sklearn: 0.0022
------------- Evaluate Validation Set -------------
------------- 1 batches --

[Representation] Clustering scores: {'NMI': 0.8658428120108335, 'ARI': 0.43079809475106, 'AMI': 0.6475341729907653}
[Representation] ACC: 0.5668
[Representation] ACC sklearn: 0.0243
[Model] Clustering scores: {'NMI': 0.8870422664062119, 'ARI': 0.578947787712562, 'AMI': 0.7541948452610986}
[Model] ACC: 0.6437
[Model] ACC sklearn: 0.0040
[19600]-----
contrastive_local_loss:	 26.67688
contrastive_global_loss:	 0.00120
clustering_loss:	 0.00371
local_consistency_loss:	 0.00000
------------- Evaluate Training Set -------------
------------- 9 batches -------------
all_pred 52
[Representation] Clustering scores: {'NMI': 0.7943180525915621, 'ARI': 0.3736937963111796, 'AMI': 0.7304726976237583}
[Representation] ACC: 0.4670
[Representation] ACC sklearn: 0.0252
[Model] Clustering scores: {'NMI': 0.8393908261199242, 'ARI': 0.5629073479561783, 'AMI': 0.8035678652861244}
[Model] ACC: 0.5636
[Model] ACC sklearn: 0.0018
------------- Evaluate Validation Set -------------
------------- 1 batches -----

[22800]-----
contrastive_local_loss:	 28.93234
contrastive_global_loss:	 0.00116
clustering_loss:	 0.00350
local_consistency_loss:	 0.00000
------------- Evaluate Training Set -------------
------------- 9 batches -------------
all_pred 53
[Representation] Clustering scores: {'NMI': 0.7917574374961072, 'ARI': 0.3614991419583375, 'AMI': 0.7261584094750837}
[Representation] ACC: 0.4413
[Representation] ACC sklearn: 0.0072
[Model] Clustering scores: {'NMI': 0.8424626934306033, 'ARI': 0.5854288368885741, 'AMI': 0.807866186817012}
[Model] ACC: 0.5955
[Model] ACC sklearn: 0.0004
------------- Evaluate Validation Set -------------
------------- 1 batches -------------
all_pred 47
[Representation] Clustering scores: {'NMI': 0.8646641538847963, 'ARI': 0.4603571074445388, 'AMI': 0.6506918646438947}
[Representation] ACC: 0.5951
[Representation] ACC sklearn: 0.0162
[Model] Clustering scores: {'NMI': 0.8910042134966406, 'ARI': 0.6095863293983853, 'AMI': 0.767883779302942}
[Model] ACC: 0.6680
[Model

all_pred 54
[Representation] Clustering scores: {'NMI': 0.7910605029311378, 'ARI': 0.35214190307675913, 'AMI': 0.7243496780635134}
[Representation] ACC: 0.4355
[Representation] ACC sklearn: 0.0018
[Model] Clustering scores: {'NMI': 0.838005315096909, 'ARI': 0.5402474068833054, 'AMI': 0.8018708144543059}
[Model] ACC: 0.5775
[Model] ACC sklearn: 0.0004
------------- Evaluate Validation Set -------------
------------- 1 batches -------------
all_pred 49
[Representation] Clustering scores: {'NMI': 0.8601319391443724, 'ARI': 0.42305382755587395, 'AMI': 0.6330697772898799}
[Representation] ACC: 0.5709
[Representation] ACC sklearn: 0.0040
[Model] Clustering scores: {'NMI': 0.8885679408535925, 'ARI': 0.5728200478974861, 'AMI': 0.7595081707252407}
[Model] ACC: 0.6437
[Model] ACC sklearn: 0.0040
[26400]-----
contrastive_local_loss:	 31.74330
contrastive_global_loss:	 0.00110
clustering_loss:	 0.00314
local_consistency_loss:	 0.00000
------------- Evaluate Training Set -------------
-------------

all_pred 53
[Representation] Clustering scores: {'NMI': 0.7915886564767012, 'ARI': 0.3565192617131209, 'AMI': 0.7251709750422329}
[Representation] ACC: 0.4283
[Representation] ACC sklearn: 0.0081
[Model] Clustering scores: {'NMI': 0.8381013460423454, 'ARI': 0.5381844400781913, 'AMI': 0.8020206096784702}
[Model] ACC: 0.5856
[Model] ACC sklearn: 0.0004
------------- Evaluate Validation Set -------------
------------- 1 batches -------------
all_pred 50
[Representation] Clustering scores: {'NMI': 0.866600475190886, 'ARI': 0.4395774763416865, 'AMI': 0.6486447064670493}
[Representation] ACC: 0.5749
[Representation] ACC sklearn: 0.0081
[Model] Clustering scores: {'NMI': 0.8860990350716531, 'ARI': 0.5617917919901703, 'AMI': 0.7521068026780074}
[Model] ACC: 0.6437
[Model] ACC sklearn: 0.0040
[29600]-----
contrastive_local_loss:	 29.51626
contrastive_global_loss:	 0.00102
clustering_loss:	 0.00305
local_consistency_loss:	 0.00000
------------- Evaluate Training Set -------------
------------- 9

###### 