In [1]:
from sklearn.metrics.pairwise import paired_cosine_distances, paired_euclidean_distances, paired_manhattan_distances
from scipy.stats import pearsonr, spearmanr
import math

In [2]:
def euclidean_distance(x,y):
  """ return euclidean distance between two lists """
 
  return math.sqrt(sum(pow(a-b,2) for a, b in zip(x, y)))

def squared_sum(x):
  """ return 3 rounded square rooted value """
 
  return round(math.sqrt(sum([a*a for a in x])),3)
  
def cos_similarity(x,y):
  """ return cosine similarity between two lists """
 
  numerator = sum(a*b for a,b in zip(x,y))
  denominator = squared_sum(x)*squared_sum(y)
  return round(numerator/float(denominator),3)

In [3]:
import logging
from tqdm import tqdm
import numpy as np
from numpy import ndarray
import pandas as pd
import torch
from torch import Tensor, device
import transformers
from transformers import AutoModel, AutoTokenizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
from typing import List, Dict, Tuple, Type, Union
from kobert_tokenizer import KoBertTokenizer

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s', datefmt='%m/%d/%Y %H:%M:%S',
                    level=logging.INFO)
logger = logging.getLogger(__name__)

In [4]:
sts_dev = pd.read_csv("/home/keonwoo/anaconda3/envs/KoDiffCSE/data/ko_sts_dev.txt")
sts_test = pd.read_csv("/home/keonwoo/anaconda3/envs/KoDiffCSE/data/ko_sts_test.txt")

In [5]:
class DiffCSE(object):
    """
    A class for embedding sentences, calculating similarities, and retriving sentences by DiffCSE. The code here is provided by SimCSE.
    """
    def __init__(self, model_name_or_path: str, 
                device: str = None,
                num_cells: int = 100,
                num_cells_in_search: int = 10,
                pooler = None):

        self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
        # self.tokenizer = KoBertTokenizer.from_pretrained(model_name_or_path)
        self.model = AutoModel.from_pretrained(model_name_or_path)
        if device is None:
            device = "cuda:1" if torch.cuda.is_available() else "cpu"
        self.device = device

        self.index = None
        self.is_faiss_index = False
        self.num_cells = num_cells
        self.num_cells_in_search = num_cells_in_search

        if pooler is not None:
            self.pooler = pooler
        else:
            logger.info("Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.")
            self.pooler = "cls_before_pooler"
    
    def encode(self, sentence: Union[str, List[str]], 
                device: str = None, 
                return_numpy: bool = False,
                normalize_to_unit: bool = True,
                keepdim: bool = False,
                batch_size: int = 64,
                max_length: int = 128) -> Union[ndarray, Tensor]:

        target_device = self.device if device is None else device
        self.model = self.model.to(target_device)
        
        single_sentence = False
        if isinstance(sentence, str):
            sentence = [sentence]
            single_sentence = True

        embedding_list = [] 
        with torch.no_grad():
            total_batch = len(sentence) // batch_size + (1 if len(sentence) % batch_size > 0 else 0)
            for batch_id in tqdm(range(total_batch)):
                inputs = self.tokenizer(
                    sentence[batch_id*batch_size:(batch_id+1)*batch_size], 
                    padding=True, 
                    truncation=True, 
                    max_length=max_length, 
                    return_tensors="pt"
                )
                inputs = {k: v.to(target_device) for k, v in inputs.items()}
                outputs = self.model(**inputs, return_dict=True)
                if self.pooler == "cls":
                    embeddings = outputs.pooler_output
                elif self.pooler == "cls_before_pooler":
                    embeddings = outputs.last_hidden_state[:, 0]
                else:
                    raise NotImplementedError
                if normalize_to_unit:
                    embeddings = embeddings / embeddings.norm(dim=1, keepdim=True)
                embedding_list.append(embeddings.cpu())
        embeddings = torch.cat(embedding_list, 0)
        
        if single_sentence and not keepdim:
            embeddings = embeddings[0]
        
        if return_numpy and not isinstance(embeddings, ndarray):
            return embeddings.numpy()
        return embeddings
    
    def similarity(self, queries: Union[str, List[str]], 
                    keys: Union[str, List[str], ndarray], 
                    device: str = None) -> Union[float, ndarray]:
        
        query_vecs = self.encode(queries, device=device, return_numpy=True) # suppose N queries
        
        if not isinstance(keys, ndarray):
            key_vecs = self.encode(keys, device=device, return_numpy=True) # suppose M keys
        else:
            key_vecs = keys

        # check whether N == 1 or M == 1
        single_query, single_key = len(query_vecs.shape) == 1, len(key_vecs.shape) == 1 
        if single_query:
            query_vecs = query_vecs.reshape(1, -1)
        if single_key:
            key_vecs = key_vecs.reshape(1, -1)
        
        # returns an N*M similarity array
        similarities = cosine_similarity(query_vecs, key_vecs)
        
        if single_query:
            similarities = similarities[0]
            if single_key:
                similarities = float(similarities[0])
        
        return similarities
    
    def build_index(self, sentences_or_file_path: Union[str, List[str]], 
                        use_faiss: bool = None,
                        faiss_fast: bool = False,
                        device: str = None,
                        batch_size: int = 64):

        if use_faiss is None or use_faiss:
            try:
                import faiss
                assert hasattr(faiss, "IndexFlatIP")
                use_faiss = True 
            except:
                logger.warning("Fail to import faiss. If you want to use faiss, install faiss through PyPI. Now the program continues with brute force search.")
                use_faiss = False
        
        # if the input sentence is a string, we assume it's the path of file that stores various sentences
        if isinstance(sentences_or_file_path, str):
            sentences = []
            with open(sentences_or_file_path, "r") as f:
                logging.info("Loading sentences from %s ..." % (sentences_or_file_path))
                for line in tqdm(f):
                    sentences.append(line.rstrip())
            sentences_or_file_path = sentences
        
        logger.info("Encoding embeddings for sentences...")
        embeddings = self.encode(sentences_or_file_path, device=device, batch_size=batch_size, normalize_to_unit=True, return_numpy=True)

        logger.info("Building index...")
        self.index = {"sentences": sentences_or_file_path}
        
        if use_faiss:
            quantizer = faiss.IndexFlatIP(embeddings.shape[1])  
            if faiss_fast:
                index = faiss.IndexIVFFlat(quantizer, embeddings.shape[1], min(self.num_cells, len(sentences_or_file_path))) 
            else:
                index = quantizer

            if (self.device == "cuda" and device != "cpu") or device == "cuda":
                if hasattr(faiss, "StandardGpuResources"):
                    logger.info("Use GPU-version faiss")
                    res = faiss.StandardGpuResources()
                    res.setTempMemory(20 * 1024 * 1024 * 1024)
                    index = faiss.index_cpu_to_gpu(res, 0, index)
                else:
                    logger.info("Use CPU-version faiss")
            else: 
                logger.info("Use CPU-version faiss")

            if faiss_fast:            
                index.train(embeddings.astype(np.float32))
            index.add(embeddings.astype(np.float32))
            index.nprobe = min(self.num_cells_in_search, len(sentences_or_file_path))
            self.is_faiss_index = True
        else:
            index = embeddings
            self.is_faiss_index = False
        self.index["index"] = index
        logger.info("Finished")
    
    def search(self, queries: Union[str, List[str]], 
                device: str = None, 
                threshold: float = 0,
                top_k: int = 5) -> Union[List[Tuple[str, float]], List[List[Tuple[str, float]]]]:
        
        if not self.is_faiss_index:
            if isinstance(queries, list):
                combined_results = []
                for query in queries:
                    results = self.search(query, device)
                    combined_results.append(results)
                return combined_results
            
            similarities = self.similarity(queries, self.index["index"]).tolist()
            id_and_score = []
            for i, s in enumerate(similarities):
                if s >= threshold:
                    id_and_score.append((i, s))
            id_and_score = sorted(id_and_score, key=lambda x: x[1], reverse=True)[:top_k]
            results = [(self.index["sentences"][idx], score) for idx, score in id_and_score]
            return results
        else:
            query_vecs = self.encode(queries, device=device, normalize_to_unit=True, keepdim=True, return_numpy=True)

            distance, idx = self.index["index"].search(query_vecs.astype(np.float32), top_k)
            
            def pack_single_result(dist, idx):
                results = [(self.index["sentences"][i], s) for i, s in zip(idx, dist) if s >= threshold]
                return results
            
            if isinstance(queries, list):
                combined_results = []
                for i in range(len(queries)):
                    results = pack_single_result(distance[i], idx[i])
                    combined_results.append(results)
                return combined_results
            else:
                return pack_single_result(distance[0], idx[0])

In [6]:
def evaluation(eval_dataset,model):
    sen_emb1 = model.encode(eval_dataset['sentence1'].tolist())
    sen_emb2 = model.encode(eval_dataset['sentence2'].tolist())
    labels = eval_dataset['score']

    cosine_scores = 1 - (paired_cosine_distances(sen_emb1, sen_emb2))
    manhattan_distances = -paired_manhattan_distances(sen_emb1, sen_emb2)
    euclidean_distances = -paired_euclidean_distances(sen_emb1, sen_emb2)
    dot_products = [np.dot(emb1, emb2) for emb1, emb2 in zip(sen_emb1, sen_emb2)]
    
    eval_pearson_cosine, _ = pearsonr(labels, cosine_scores)
    eval_spearman_cosine, _ = spearmanr(labels, cosine_scores)

    eval_pearson_manhattan, _ = pearsonr(labels, manhattan_distances)
    eval_spearman_manhattan, _ = spearmanr(labels, manhattan_distances)

    eval_pearson_euclidean, _ = pearsonr(labels, euclidean_distances)
    eval_spearman_euclidean, _ = spearmanr(labels, euclidean_distances)

    eval_pearson_dot, _ = pearsonr(labels, dot_products)
    eval_spearman_dot, _ = spearmanr(labels, dot_products)

    score = {'eval_pearson_cosine': eval_pearson_cosine,
            'eval_spearman_cosine': eval_spearman_cosine,
            'eval_pearson_manhattan': eval_pearson_manhattan,
            'eval_spearman_manhattan': eval_spearman_manhattan,
            'eval_pearson_euclidean': eval_pearson_euclidean,
            'eval_spearman_euclidean': eval_spearman_euclidean,
            'eval_pearson_dot': eval_pearson_dot,
            'eval_spearman_dot': eval_spearman_dot}
    return score

### 0720/epoch=1

In [99]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0720_epoch=1"
diffcse = DiffCSE(model_name)

Some weights of BertModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0720_epoch=1 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/21/2022 18:51:39 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [101]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 18.56it/s]
100%|██████████| 23/23 [00:01<00:00, 19.85it/s]


{'eval_pearson_cosine': 0.7430483153702674,
 'eval_spearman_cosine': 0.7415490325180422,
 'eval_pearson_manhattan': 0.7334779689669992,
 'eval_spearman_manhattan': 0.7416093893020635,
 'eval_pearson_euclidean': 0.7333004017195163,
 'eval_spearman_euclidean': 0.7415480872721462,
 'eval_pearson_dot': 0.7430483193293904,
 'eval_spearman_dot': 0.7415495188249112}

In [102]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:01<00:00, 21.85it/s]
100%|██████████| 22/22 [00:00<00:00, 22.82it/s]


{'eval_pearson_cosine': 0.6789423949638527,
 'eval_spearman_cosine': 0.6689631112979975,
 'eval_pearson_manhattan': 0.6691956475211638,
 'eval_spearman_manhattan': 0.66911377956273,
 'eval_pearson_euclidean': 0.6686868295838538,
 'eval_spearman_euclidean': 0.668962482106489,
 'eval_pearson_dot': 0.6789423604048723,
 'eval_spearman_dot': 0.6689578314366862}

### 0721/epoch=1_update

In [83]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0721_epoch=1_update"
diffcse = DiffCSE(model_name)

Some weights of BertModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0721_epoch=1_update and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/21/2022 18:43:09 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [86]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 18.72it/s]
100%|██████████| 23/23 [00:01<00:00, 19.99it/s]


{'eval_pearson_cosine': 0.662757380420375,
 'eval_spearman_cosine': 0.6641554724076045,
 'eval_pearson_manhattan': 0.6569087920857435,
 'eval_spearman_manhattan': 0.6665474061347986,
 'eval_pearson_euclidean': 0.6542220340677666,
 'eval_spearman_euclidean': 0.6641554740588386,
 'eval_pearson_dot': 0.662757394704638,
 'eval_spearman_dot': 0.6641546651426425}

In [85]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:01<00:00, 21.89it/s]
100%|██████████| 22/22 [00:00<00:00, 22.92it/s]


{'eval_pearson_cosine': 0.5916246647737429,
 'eval_spearman_cosine': 0.5756435421929641,
 'eval_pearson_manhattan': 0.5778638048876472,
 'eval_spearman_manhattan': 0.5764579773386374,
 'eval_pearson_euclidean': 0.5769955818280884,
 'eval_spearman_euclidean': 0.5756429198787706,
 'eval_pearson_dot': 0.5916246496003235,
 'eval_spearman_dot': 0.5756421480932179}

### 0721/epoch=2

In [87]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0721_epoch=2"
diffcse = DiffCSE(model_name)

Some weights of BertModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0721_epoch=2 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/21/2022 18:43:55 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [88]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 18.72it/s]
100%|██████████| 23/23 [00:01<00:00, 19.98it/s]


{'eval_pearson_cosine': 0.7429097011937542,
 'eval_spearman_cosine': 0.7426633860985158,
 'eval_pearson_manhattan': 0.7331639057210683,
 'eval_spearman_manhattan': 0.7423990207626731,
 'eval_pearson_euclidean': 0.7334587848287031,
 'eval_spearman_euclidean': 0.7426628686388035,
 'eval_pearson_dot': 0.7429097086974977,
 'eval_spearman_dot': 0.7426633098511968}

In [89]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:01<00:00, 21.89it/s]
100%|██████████| 22/22 [00:00<00:00, 22.92it/s]


{'eval_pearson_cosine': 0.6816542228074193,
 'eval_spearman_cosine': 0.6753138250845714,
 'eval_pearson_manhattan': 0.6715845931475771,
 'eval_spearman_manhattan': 0.6745285209567878,
 'eval_pearson_euclidean': 0.6714984469001893,
 'eval_spearman_euclidean': 0.6753139993475267,
 'eval_pearson_dot': 0.6816542522447457,
 'eval_spearman_dot': 0.6753131747006083}

### 0721/epoch=3

In [90]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0721_epoch=3"
diffcse = DiffCSE(model_name)

Some weights of BertModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0721_epoch=3 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/21/2022 18:44:47 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [91]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 18.73it/s]
100%|██████████| 23/23 [00:01<00:00, 19.97it/s]


{'eval_pearson_cosine': 0.7449418939470757,
 'eval_spearman_cosine': 0.7450532388568598,
 'eval_pearson_manhattan': 0.7352325024479718,
 'eval_spearman_manhattan': 0.744819342671332,
 'eval_pearson_euclidean': 0.7355379483476462,
 'eval_spearman_euclidean': 0.745052981579705,
 'eval_pearson_dot': 0.7449418548077157,
 'eval_spearman_dot': 0.7450557130956711}

In [92]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:01<00:00, 21.98it/s]
100%|██████████| 22/22 [00:00<00:00, 22.94it/s]


{'eval_pearson_cosine': 0.682553044782682,
 'eval_spearman_cosine': 0.6762187768957946,
 'eval_pearson_manhattan': 0.6717086608629301,
 'eval_spearman_manhattan': 0.6758890202105615,
 'eval_pearson_euclidean': 0.671870973444449,
 'eval_spearman_euclidean': 0.6762181471695697,
 'eval_pearson_dot': 0.6825530703190852,
 'eval_spearman_dot': 0.6762195327624885}

### 0721/epoch=3_update

In [8]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0721_epoch=5"
diffcse = DiffCSE(model_name)

Some weights of BertModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0721_epoch=5 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/26/2022 15:26:33 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [9]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:02<00:00, 10.63it/s]
100%|██████████| 23/23 [00:01<00:00, 11.88it/s]


{'eval_pearson_cosine': 0.7537775419741063,
 'eval_spearman_cosine': 0.7522780040723923,
 'eval_pearson_manhattan': 0.7422700944976672,
 'eval_spearman_manhattan': 0.7511530251300458,
 'eval_pearson_euclidean': 0.7433892370711733,
 'eval_spearman_euclidean': 0.7522770526196226,
 'eval_pearson_dot': 0.753777531339438,
 'eval_spearman_dot': 0.7522808528224674}

In [10]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:01<00:00, 12.48it/s]
100%|██████████| 22/22 [00:02<00:00, 10.19it/s]


{'eval_pearson_cosine': 0.6936895655983226,
 'eval_spearman_cosine': 0.6833076355803068,
 'eval_pearson_manhattan': 0.6804828262310914,
 'eval_spearman_manhattan': 0.68206494912713,
 'eval_pearson_euclidean': 0.6813340804473689,
 'eval_spearman_euclidean': 0.6833084982377086,
 'eval_pearson_dot': 0.6936895974779715,
 'eval_spearman_dot': 0.6833125891721886}

### ko_output_model_0721_epoch=5

In [104]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0721_epoch=5"
diffcse = DiffCSE(model_name)

Some weights of BertModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0721_epoch=5 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/22/2022 10:38:32 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [105]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 18.71it/s]
100%|██████████| 23/23 [00:01<00:00, 19.92it/s]


{'eval_pearson_cosine': 0.7537775419741063,
 'eval_spearman_cosine': 0.7522780040723923,
 'eval_pearson_manhattan': 0.7422700944976672,
 'eval_spearman_manhattan': 0.7511530251300458,
 'eval_pearson_euclidean': 0.7433892370711733,
 'eval_spearman_euclidean': 0.7522770526196226,
 'eval_pearson_dot': 0.753777531339438,
 'eval_spearman_dot': 0.7522808528224674}

In [106]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:01<00:00, 21.88it/s]
100%|██████████| 22/22 [00:00<00:00, 22.89it/s]


{'eval_pearson_cosine': 0.6936895655983226,
 'eval_spearman_cosine': 0.6833076355803068,
 'eval_pearson_manhattan': 0.6804828262310914,
 'eval_spearman_manhattan': 0.68206494912713,
 'eval_pearson_euclidean': 0.6813340804473689,
 'eval_spearman_euclidean': 0.6833084982377086,
 'eval_pearson_dot': 0.6936895974779715,
 'eval_spearman_dot': 0.6833125891721886}

### ko_output_model_0722_epoch=3

In [None]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0722_epoch=3"
diffcse = DiffCSE(model_name)

In [113]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 18.70it/s]
100%|██████████| 23/23 [00:01<00:00, 19.96it/s]


{'eval_pearson_cosine': 0.7496809739171452,
 'eval_spearman_cosine': 0.7498079530787528,
 'eval_pearson_manhattan': 0.740212708489065,
 'eval_spearman_manhattan': 0.7490060256935015,
 'eval_pearson_euclidean': 0.7409064642188012,
 'eval_spearman_euclidean': 0.7498067407181096,
 'eval_pearson_dot': 0.7496809759863061,
 'eval_spearman_dot': 0.7498089403312816}

In [114]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:01<00:00, 21.89it/s]
100%|██████████| 22/22 [00:00<00:00, 22.86it/s]


{'eval_pearson_cosine': 0.6889303965449269,
 'eval_spearman_cosine': 0.6819790313592456,
 'eval_pearson_manhattan': 0.6792995190430124,
 'eval_spearman_manhattan': 0.6819558765224221,
 'eval_pearson_euclidean': 0.6791308900885948,
 'eval_spearman_euclidean': 0.6819778516592317,
 'eval_pearson_dot': 0.6889303110917411,
 'eval_spearman_dot': 0.6819811436246029}

In [116]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0722_epoch=3"
diffcse = DiffCSE(model_name)

Some weights of BertModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0722_epoch=3 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/24/2022 17:12:14 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [117]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 18.73it/s]
100%|██████████| 23/23 [00:01<00:00, 19.93it/s]


{'eval_pearson_cosine': 0.7497982564224046,
 'eval_spearman_cosine': 0.7498380643959762,
 'eval_pearson_manhattan': 0.7402435924557833,
 'eval_spearman_manhattan': 0.7497757682262272,
 'eval_pearson_euclidean': 0.7404489069959012,
 'eval_spearman_euclidean': 0.7498378084675342,
 'eval_pearson_dot': 0.7497983009103533,
 'eval_spearman_dot': 0.7498411844108553}

In [118]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:01<00:00, 21.95it/s]
100%|██████████| 22/22 [00:00<00:00, 22.92it/s]


{'eval_pearson_cosine': 0.6890170554605594,
 'eval_spearman_cosine': 0.6823636423047701,
 'eval_pearson_manhattan': 0.6784021278191614,
 'eval_spearman_manhattan': 0.6822042786868168,
 'eval_pearson_euclidean': 0.6781986054362115,
 'eval_spearman_euclidean': 0.6823627262150469,
 'eval_pearson_dot': 0.6890170336613468,
 'eval_spearman_dot': 0.6823622649592354}

### wiki

In [19]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0725_epoch=3_wiki"
diffcse = DiffCSE(model_name)

Some weights of BertModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0725_epoch=3_wiki and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/27/2022 13:08:22 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [8]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 18.58it/s]
100%|██████████| 23/23 [00:01<00:00, 19.95it/s]


{'eval_pearson_cosine': 0.7423878931147392,
 'eval_spearman_cosine': 0.7478232245078454,
 'eval_pearson_manhattan': 0.7440739440823739,
 'eval_spearman_manhattan': 0.747558367245619,
 'eval_pearson_euclidean': 0.7442273147515874,
 'eval_spearman_euclidean': 0.7478229657560735,
 'eval_pearson_dot': 0.7423879241012167,
 'eval_spearman_dot': 0.7478231529370889}

In [9]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:01<00:00, 21.92it/s]
100%|██████████| 22/22 [00:00<00:00, 22.93it/s]


{'eval_pearson_cosine': 0.6885252617917498,
 'eval_spearman_cosine': 0.6901729124525737,
 'eval_pearson_manhattan': 0.6859499901719317,
 'eval_spearman_manhattan': 0.6891210197478684,
 'eval_pearson_euclidean': 0.6861915056026157,
 'eval_spearman_euclidean': 0.6901720180499071,
 'eval_pearson_dot': 0.6885252278955448,
 'eval_spearman_dot': 0.6901720203556381}

In [11]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0726_epoch=2_wiki"
diffcse = DiffCSE(model_name)

Some weights of BertModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0726_epoch=2_wiki and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/26/2022 15:44:50 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [12]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 18.63it/s]
100%|██████████| 23/23 [00:01<00:00, 19.90it/s]


{'eval_pearson_cosine': 0.7355747969457602,
 'eval_spearman_cosine': 0.7390648904711767,
 'eval_pearson_manhattan': 0.7326426533278265,
 'eval_spearman_manhattan': 0.7381511393261372,
 'eval_pearson_euclidean': 0.7332278939062458,
 'eval_spearman_euclidean': 0.7390643183949986,
 'eval_pearson_dot': 0.7355747733729483,
 'eval_spearman_dot': 0.7390650220120545}

In [13]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:01<00:00, 21.85it/s]
100%|██████████| 22/22 [00:00<00:00, 22.77it/s]


{'eval_pearson_cosine': 0.6884438642495199,
 'eval_spearman_cosine': 0.6853356579347044,
 'eval_pearson_manhattan': 0.6796456195169459,
 'eval_spearman_manhattan': 0.6850308792888224,
 'eval_pearson_euclidean': 0.6797312292188201,
 'eval_spearman_euclidean': 0.6853349291175604,
 'eval_pearson_dot': 0.6884438761847315,
 'eval_spearman_dot': 0.6853380693996083}

: 

In [17]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0726_epoch=3_wiki_roberta"
diffcse = DiffCSE(model_name)

Some weights of RobertaModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/ko_output_model_0726_epoch=3_wiki_roberta and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/28/2022 17:15:08 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [18]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 21.54it/s]
100%|██████████| 23/23 [00:00<00:00, 23.05it/s]


{'eval_pearson_cosine': 0.8687910643488711,
 'eval_spearman_cosine': 0.8716889054665014,
 'eval_pearson_manhattan': 0.865198609618478,
 'eval_spearman_manhattan': 0.8714871023375671,
 'eval_pearson_euclidean': 0.8650005637173033,
 'eval_spearman_euclidean': 0.8716889445925671,
 'eval_pearson_dot': 0.8687910668719111,
 'eval_spearman_dot': 0.8716884249150619}

In [19]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:00<00:00, 26.09it/s]
100%|██████████| 22/22 [00:00<00:00, 27.28it/s]


{'eval_pearson_cosine': 0.8378953046255182,
 'eval_spearman_cosine': 0.8418862800933891,
 'eval_pearson_manhattan': 0.8405632262518306,
 'eval_spearman_manhattan': 0.8420296218357668,
 'eval_pearson_euclidean': 0.8406600675926741,
 'eval_spearman_euclidean': 0.8418890852259043,
 'eval_pearson_dot': 0.8378953029110046,
 'eval_spearman_dot': 0.8418864059664883}

: 

In [8]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/klue_roberta_wiki_nli"
diffcse = DiffCSE(model_name)

Some weights of RobertaModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/klue_roberta_wiki_nli and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/28/2022 15:10:58 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [9]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 20.39it/s]
100%|██████████| 23/23 [00:01<00:00, 21.84it/s]


{'eval_pearson_cosine': 0.81116548985272,
 'eval_spearman_cosine': 0.8070159995898994,
 'eval_pearson_manhattan': 0.8039373824062716,
 'eval_spearman_manhattan': 0.8069157506661292,
 'eval_pearson_euclidean': 0.8041587886543301,
 'eval_spearman_euclidean': 0.8070160873304586,
 'eval_pearson_dot': 0.8111654768822352,
 'eval_spearman_dot': 0.8070161219556101}

In [10]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:00<00:00, 24.18it/s]
100%|██████████| 22/22 [00:00<00:00, 24.85it/s]


{'eval_pearson_cosine': 0.7672836498111705,
 'eval_spearman_cosine': 0.7530479775086246,
 'eval_pearson_manhattan': 0.7652537526424827,
 'eval_spearman_manhattan': 0.7521709432779005,
 'eval_pearson_euclidean': 0.7661168415898592,
 'eval_spearman_euclidean': 0.7530515342250392,
 'eval_pearson_dot': 0.7672836503851974,
 'eval_spearman_dot': 0.7530506724231747}

In [12]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr"
diffcse = DiffCSE(model_name)

Some weights of RobertaModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/28/2022 16:51:24 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [13]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 21.08it/s]
100%|██████████| 23/23 [00:01<00:00, 22.40it/s]


{'eval_pearson_cosine': 0.8677051357322766,
 'eval_spearman_cosine': 0.8703762435501177,
 'eval_pearson_manhattan': 0.8637662440953195,
 'eval_spearman_manhattan': 0.8705912673676367,
 'eval_pearson_euclidean': 0.8634209675720484,
 'eval_spearman_euclidean': 0.8703764408621967,
 'eval_pearson_dot': 0.8677051350098797,
 'eval_spearman_dot': 0.8703760251444351}

In [14]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:00<00:00, 25.99it/s]
100%|██████████| 22/22 [00:00<00:00, 26.97it/s]


{'eval_pearson_cosine': 0.8354898438099596,
 'eval_spearman_cosine': 0.8389973096191164,
 'eval_pearson_manhattan': 0.8373701973555356,
 'eval_spearman_manhattan': 0.8391687376836482,
 'eval_pearson_euclidean': 0.8371371947826659,
 'eval_spearman_euclidean': 0.8390001149645382,
 'eval_pearson_dot': 0.8354898489586187,
 'eval_spearman_dot': 0.8389966127531397}

In [7]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr"
diffcse = DiffCSE(model_name)

Some weights of RobertaModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/29/2022 11:47:46 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [8]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 20.99it/s]
100%|██████████| 23/23 [00:00<00:00, 23.10it/s]


{'eval_pearson_cosine': 0.8705672441523,
 'eval_spearman_cosine': 0.8731524742242264,
 'eval_pearson_manhattan': 0.8667139824831696,
 'eval_spearman_manhattan': 0.8731835296107339,
 'eval_pearson_euclidean': 0.8665520167926218,
 'eval_spearman_euclidean': 0.8731523597859316,
 'eval_pearson_dot': 0.8705672400657357,
 'eval_spearman_dot': 0.8731528690272887}

In [9]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:00<00:00, 25.44it/s]
100%|██████████| 22/22 [00:00<00:00, 26.76it/s]


{'eval_pearson_cosine': 0.8396358894503797,
 'eval_spearman_cosine': 0.8445370593154237,
 'eval_pearson_manhattan': 0.8422651929042944,
 'eval_spearman_manhattan': 0.8445982693912436,
 'eval_pearson_euclidean': 0.8422348443452508,
 'eval_spearman_euclidean': 0.8445401280784124,
 'eval_pearson_dot': 0.8396358914893068,
 'eval_spearman_dot': 0.8445356720337045}

: 

### 0731

In [8]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_7e-7"
diffcse = DiffCSE(model_name)

Some weights of RobertaModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_7e-7 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/31/2022 20:39:06 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [9]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 20.61it/s]
100%|██████████| 23/23 [00:00<00:00, 23.03it/s]


{'eval_pearson_cosine': 0.8703719225199198,
 'eval_spearman_cosine': 0.8728109872851335,
 'eval_pearson_manhattan': 0.8662857455556076,
 'eval_spearman_manhattan': 0.8728083176251981,
 'eval_pearson_euclidean': 0.8661013996262362,
 'eval_spearman_euclidean': 0.8728110737701502,
 'eval_pearson_dot': 0.8703719178834881,
 'eval_spearman_dot': 0.87281079273135}

In [10]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:00<00:00, 25.80it/s]
100%|██████████| 22/22 [00:00<00:00, 27.23it/s]


{'eval_pearson_cosine': 0.8393259454132976,
 'eval_spearman_cosine': 0.8441974181976699,
 'eval_pearson_manhattan': 0.8419134079057237,
 'eval_spearman_manhattan': 0.8442122392961918,
 'eval_pearson_euclidean': 0.841858186604366,
 'eval_spearman_euclidean': 0.8442002231598625,
 'eval_pearson_dot': 0.8393259492453873,
 'eval_spearman_dot': 0.8441973863636499}

In [11]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_9e-7"
diffcse = DiffCSE(model_name)

Some weights of RobertaModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_9e-7 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/31/2022 20:40:15 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [12]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 21.21it/s]
100%|██████████| 23/23 [00:01<00:00, 22.21it/s]


{'eval_pearson_cosine': 0.8704430788641979,
 'eval_spearman_cosine': 0.8730103762576663,
 'eval_pearson_manhattan': 0.8665732188156173,
 'eval_spearman_manhattan': 0.8730911458636692,
 'eval_pearson_euclidean': 0.8663845005636351,
 'eval_spearman_euclidean': 0.8730106774930061,
 'eval_pearson_dot': 0.8704430749079863,
 'eval_spearman_dot': 0.8730103217003322}

In [13]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:00<00:00, 26.34it/s]
100%|██████████| 22/22 [00:00<00:00, 27.00it/s]


{'eval_pearson_cosine': 0.8393245922075555,
 'eval_spearman_cosine': 0.8441979956025604,
 'eval_pearson_manhattan': 0.8419665061597279,
 'eval_spearman_manhattan': 0.844372613443418,
 'eval_pearson_euclidean': 0.841925903300984,
 'eval_spearman_euclidean': 0.844200474484541,
 'eval_pearson_dot': 0.8393245878975801,
 'eval_spearman_dot': 0.8441987306730684}

In [14]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_5e-7"
diffcse = DiffCSE(model_name)

Some weights of RobertaModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_5e-7 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/31/2022 20:40:46 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [15]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 21.07it/s]
100%|██████████| 23/23 [00:00<00:00, 23.28it/s]


{'eval_pearson_cosine': 0.8700885518135814,
 'eval_spearman_cosine': 0.8725837960783911,
 'eval_pearson_manhattan': 0.865985355644485,
 'eval_spearman_manhattan': 0.8726118291292423,
 'eval_pearson_euclidean': 0.8657674603393741,
 'eval_spearman_euclidean': 0.8725844090204875,
 'eval_pearson_dot': 0.8700885523604038,
 'eval_spearman_dot': 0.8725841077774202}

In [16]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:00<00:00, 25.90it/s]
100%|██████████| 22/22 [00:00<00:00, 26.63it/s]


{'eval_pearson_cosine': 0.8389062028665961,
 'eval_spearman_cosine': 0.8436164118072936,
 'eval_pearson_manhattan': 0.8412994493413722,
 'eval_spearman_manhattan': 0.8435513650948918,
 'eval_pearson_euclidean': 0.8412178273116202,
 'eval_spearman_euclidean': 0.8436192168123077,
 'eval_pearson_dot': 0.8389062015868869,
 'eval_spearman_dot': 0.8436176915069955}

In [17]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_1e-7"
diffcse = DiffCSE(model_name)

Some weights of RobertaModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_1e-7 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/31/2022 20:41:18 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [18]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 21.17it/s]
100%|██████████| 23/23 [00:01<00:00, 22.60it/s]


{'eval_pearson_cosine': 0.8703765260274898,
 'eval_spearman_cosine': 0.8729237864028652,
 'eval_pearson_manhattan': 0.8663161809252254,
 'eval_spearman_manhattan': 0.8730032953029576,
 'eval_pearson_euclidean': 0.8661111072564047,
 'eval_spearman_euclidean': 0.8729241421971914,
 'eval_pearson_dot': 0.8703765329273702,
 'eval_spearman_dot': 0.87292380995555}

In [19]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:00<00:00, 26.08it/s]
100%|██████████| 22/22 [00:00<00:00, 26.68it/s]


{'eval_pearson_cosine': 0.839479303863836,
 'eval_spearman_cosine': 0.8440946263966163,
 'eval_pearson_manhattan': 0.8419249592818503,
 'eval_spearman_manhattan': 0.8440635670952641,
 'eval_pearson_euclidean': 0.8418341181072792,
 'eval_spearman_euclidean': 0.8440974313663845,
 'eval_pearson_dot': 0.839479306331645,
 'eval_spearman_dot': 0.8440959887698793}

In [20]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_7e-8"
diffcse = DiffCSE(model_name)

Some weights of RobertaModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_7e-8 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
07/31/2022 20:41:48 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [21]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 21.29it/s]
100%|██████████| 23/23 [00:01<00:00, 22.13it/s]


{'eval_pearson_cosine': 0.8704265489944512,
 'eval_spearman_cosine': 0.873031324174672,
 'eval_pearson_manhattan': 0.866514516247571,
 'eval_spearman_manhattan': 0.8729967859876443,
 'eval_pearson_euclidean': 0.8663436886647545,
 'eval_spearman_euclidean': 0.8730314106554843,
 'eval_pearson_dot': 0.8704265425238364,
 'eval_spearman_dot': 0.8730311992332854}

In [22]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:00<00:00, 26.13it/s]
100%|██████████| 22/22 [00:00<00:00, 26.74it/s]


{'eval_pearson_cosine': 0.8396686993720296,
 'eval_spearman_cosine': 0.8442649425070043,
 'eval_pearson_manhattan': 0.8421512399138709,
 'eval_spearman_manhattan': 0.8442785396188311,
 'eval_pearson_euclidean': 0.8420734838543887,
 'eval_spearman_euclidean': 0.844268009345378,
 'eval_pearson_dot': 0.8396687038136059,
 'eval_spearman_dot': 0.8442652043881809}

: 

In [7]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_1e-6_0731"
diffcse = DiffCSE(model_name)

Some weights of RobertaModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/sroberta_change_lr_1e-6_0731 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
08/01/2022 16:07:33 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [8]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 19.61it/s]
100%|██████████| 23/23 [00:01<00:00, 22.15it/s]


{'eval_pearson_cosine': 0.8703825925377942,
 'eval_spearman_cosine': 0.8729544023259009,
 'eval_pearson_manhattan': 0.8665877997685989,
 'eval_spearman_manhattan': 0.8730924966068773,
 'eval_pearson_euclidean': 0.8663983282647466,
 'eval_spearman_euclidean': 0.8729550748150902,
 'eval_pearson_dot': 0.870382585703337,
 'eval_spearman_dot': 0.872954207770749}

In [9]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:00<00:00, 25.79it/s]
100%|██████████| 22/22 [00:00<00:00, 26.71it/s]


{'eval_pearson_cosine': 0.8392317814416363,
 'eval_spearman_cosine': 0.8442386998277233,
 'eval_pearson_manhattan': 0.842104208319224,
 'eval_spearman_manhattan': 0.8441231587128242,
 'eval_pearson_euclidean': 0.842034212732687,
 'eval_spearman_euclidean': 0.8442417632094645,
 'eval_pearson_dot': 0.8392317796629851,
 'eval_spearman_dot': 0.8442394917029973}

: 

In [7]:
model_name = "/home/keonwoo/anaconda3/envs/KoDiffCSE/last_training_0802"
diffcse = DiffCSE(model_name)

Some weights of RobertaModel were not initialized from the model checkpoint at /home/keonwoo/anaconda3/envs/KoDiffCSE/last_training_0802 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
08/02/2022 17:04:33 - INFO - __main__ -   Use `cls_before_pooler` for DiffCSE models. If you want to use other pooling policy, specify `pooler` argument.


In [8]:
evaluation(sts_dev, diffcse)

100%|██████████| 23/23 [00:01<00:00, 12.13it/s]
100%|██████████| 23/23 [00:01<00:00, 12.44it/s]


{'eval_pearson_cosine': 0.8701157612651806,
 'eval_spearman_cosine': 0.8728667472958714,
 'eval_pearson_manhattan': 0.8665277694786487,
 'eval_spearman_manhattan': 0.8729877684625479,
 'eval_pearson_euclidean': 0.8662967386974056,
 'eval_spearman_euclidean': 0.8728673652290121,
 'eval_pearson_dot': 0.8701157605137709,
 'eval_spearman_dot': 0.872866518239194}

In [9]:
evaluation(sts_test, diffcse)

100%|██████████| 22/22 [00:01<00:00, 14.28it/s]
100%|██████████| 22/22 [00:01<00:00, 16.89it/s]


{'eval_pearson_cosine': 0.8392139146784754,
 'eval_spearman_cosine': 0.8442226659539233,
 'eval_pearson_manhattan': 0.8421124381662313,
 'eval_spearman_manhattan': 0.844189450784655,
 'eval_pearson_euclidean': 0.8420537574882557,
 'eval_spearman_euclidean': 0.8442252090330523,
 'eval_pearson_dot': 0.8392139237100509,
 'eval_spearman_dot': 0.8442242373481732}

: 

### Qualititive Analysis

In [15]:
example_sentences = ['한 남자가 음식을 먹는다.',
              '한 남자가 빵 한 조각을 먹는다.',
              '그 여자가 아이를 돌본다.',
              '한 남자가 말을 탄다.',
              '한 여자가 바이올린을 연주한다.',
              '두 남자가 수레를 숲 속으로 밀었다.',
              '한 남자가 담으로 싸인 땅에서 백마를 타고 있다.',
              '원숭이 한 마리가 드럼을 연주한다.',
              '치타 한 마리가 먹이 뒤에서 달리고 있다.']

example_queries = ['한 남자가 파스타를 먹는다.',
               '고릴라 의상을 입은 누군가가 드럼을 연주하고 있다.',
               '치타가 들판을 가로 질러 먹이를 쫓는다.']

In [21]:
print("\n=========Calculate cosine similarities between queries and sentences============\n")
similarities = diffcse.similarity(example_queries, example_sentences)
# print(similarities)
print("\n=========Naive brute force search============\n")
diffcse.build_index(example_sentences, use_faiss=False)
results = diffcse.search(example_queries)
for i, result in enumerate(results):
    print("Retrieval results for query: {}".format(example_queries[i]))
    for sentence, score in result:
        print("    {}  (cosine similarity: {:.4f})".format(sentence, score))
    print("")

100%|██████████| 1/1 [00:00<00:00, 75.05it/s]
100%|██████████| 1/1 [00:00<00:00, 76.08it/s]
07/27/2022 13:08:37 - INFO - __main__ -   Encoding embeddings for sentences...
100%|██████████| 1/1 [00:00<00:00, 76.10it/s]
07/27/2022 13:08:37 - INFO - __main__ -   Building index...
07/27/2022 13:08:37 - INFO - __main__ -   Finished
100%|██████████| 1/1 [00:00<00:00, 85.56it/s]
100%|██████████| 1/1 [00:00<00:00, 82.70it/s]
  0%|          | 0/1 [00:00<?, ?it/s]







100%|██████████| 1/1 [00:00<00:00, 79.80it/s]

Retrieval results for query: 한 남자가 파스타를 먹는다.
    한 남자가 음식을 먹는다.  (cosine similarity: 0.5752)
    한 남자가 빵 한 조각을 먹는다.  (cosine similarity: 0.5028)
    치타 한 마리가 먹이 뒤에서 달리고 있다.  (cosine similarity: 0.1983)
    한 여자가 바이올린을 연주한다.  (cosine similarity: 0.1840)
    한 남자가 담으로 싸인 땅에서 백마를 타고 있다.  (cosine similarity: 0.1567)

Retrieval results for query: 고릴라 의상을 입은 누군가가 드럼을 연주하고 있다.
    원숭이 한 마리가 드럼을 연주한다.  (cosine similarity: 0.7175)
    한 여자가 바이올린을 연주한다.  (cosine similarity: 0.3894)
    한 남자가 담으로 싸인 땅에서 백마를 타고 있다.  (cosine similarity: 0.3440)
    치타 한 마리가 먹이 뒤에서 달리고 있다.  (cosine similarity: 0.3329)
    한 남자가 음식을 먹는다.  (cosine similarity: 0.2224)

Retrieval results for query: 치타가 들판을 가로 질러 먹이를 쫓는다.
    치타 한 마리가 먹이 뒤에서 달리고 있다.  (cosine similarity: 0.8224)
    원숭이 한 마리가 드럼을 연주한다.  (cosine similarity: 0.3260)
    한 남자가 담으로 싸인 땅에서 백마를 타고 있다.  (cosine similarity: 0.2750)
    두 남자가 수레를 숲 속으로 밀었다.  (cosine similarity: 0.2340)
    한 남자가 음식을 먹는다.  (cosine similarity: 0.2312)






In [13]:
print("\n=========Search with Faiss backend============\n")
diffcse.build_index(example_sentences, use_faiss=True)
results = diffcse.search(example_queries)
for i, result in enumerate(results):
    print("Retrieval results for query: {}".format(example_queries[i]))
    for sentence, score in result:
        print("    {}  (cosine similarity: {:.4f})".format(sentence, score))
    print("")

07/27/2022 13:06:51 - INFO - __main__ -   Encoding embeddings for sentences...
100%|██████████| 1/1 [00:00<00:00, 68.96it/s]
07/27/2022 13:06:51 - INFO - __main__ -   Building index...
07/27/2022 13:06:51 - INFO - __main__ -   Finished
100%|██████████| 1/1 [00:00<00:00, 86.28it/s]
100%|██████████| 1/1 [00:00<00:00, 84.06it/s]
100%|██████████| 1/1 [00:00<00:00, 83.95it/s]



Retrieval results for query: 한 남자가 파스타를 먹는다.

Retrieval results for query: 고릴라 의상을 입은 누군가가 드럼을 연주하고 있다.
    원숭이 한 마리가 드럼을 연주한다.  (cosine similarity: 0.7175)

Retrieval results for query: 치타가 들판을 가로 질러 먹이를 쫓는다.
    치타 한 마리가 먹이 뒤에서 달리고 있다.  (cosine similarity: 0.8224)






# valid 수정

In [34]:
from transformers import AutoTokenizer, AutoModel, DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from transformers import get_scheduler

In [58]:
class contentDataset(Dataset):
    def __init__(self, file, tok, max_len, pad_index=None):
        super().__init__()
        self.tok =tok
        self.max_len = max_len
        self.content = pd.read_csv(file)
        self.len = self.content.shape[0]
        self.pad_index = self.tok.pad_token
    
    def add_padding_data(self, inputs, max_len):
        if len(inputs) < max_len:
            # pad = np.array([self.pad_index] * (max_len - len(inputs)))
            pad = np.array([0] * (max_len - len(inputs)))
            inputs = np.concatenate([inputs, pad])
            return inputs
        else:
            inputs = inputs[:max_len]
            return inputs
    
    def __getitem__(self,idx):
        instance = self.content.iloc[idx]
        # text = "[CLS]" + instance['content'] + "[SEP]"
        text_1 = instance['sentence1']
        text_2 = instance['sentence2']

        text_1_input_ids = self.tok.encode(text_1)
        text_2_input_ids = self.tok.encode(text_2)
        
        text_1_input_ids = self.add_padding_data(text_1_input_ids, max_len=self.max_len)
        text_2_input_ids = self.add_padding_data(text_2_input_ids, max_len=self.max_len)

        label_ids = instance['score']
        # encoder_attention_mask = input_ids.ne(0).float()
        return {"text1_encoder_input_ids" : np.array(text_1_input_ids, dtype=np.int_),
                "text2_encoder_input_ids" : np.array(text_2_input_ids, dtype=np.int_),
                "score" : np.array(label_ids,dtype=np.int_)}
        
    def __len__(self):
        return self.len

In [36]:
from kobert_tokenizer import KoBertTokenizer

In [41]:
tokenizer = KoBertTokenizer.from_pretrained('monologg/kobert')
file_path = "/home/keonwoo/anaconda3/envs/KoDiffCSE/data/ko_sts_dev.txt"

In [59]:
valid_setup = contentDataset(file = file_path, tok = tokenizer, max_len = 512)

In [60]:
valid_dataloader = DataLoader(valid_setup, batch_size=32, shuffle=False)

In [61]:
device =torch.device("cuda:0")
for batch in valid_dataloader:
    print(batch)
    batch = {k: v.to(device) for k, v in batch.items()}
    print('\n')
    print(batch)
    break

{'text1_encoder_input_ids': tensor([[   2, 3139, 6213,  ...,    0,    0,    0],
        [   2, 3233, 6804,  ...,    0,    0,    0],
        [   2, 4955, 1423,  ...,    0,    0,    0],
        ...,
        [   2, 4955, 1423,  ...,    0,    0,    0],
        [   2, 1185, 3318,  ...,    0,    0,    0],
        [   2, 4955,  517,  ...,    0,    0,    0]]), 'text2_encoder_input_ids': tensor([[   2, 3139, 6213,  ...,    0,    0,    0],
        [   2, 3121, 5330,  ...,    0,    0,    0],
        [   2, 1423, 5330,  ...,    0,    0,    0],
        ...,
        [   2, 4955, 3318,  ...,    0,    0,    0],
        [   2, 1423, 5330,  ...,    0,    0,    0],
        [   2, 4955,  517,  ...,    0,    0,    0]]), 'score': tensor([5, 4, 5, 2, 2, 2, 5, 2, 3, 5, 3, 1, 5, 5, 4, 0, 2, 5, 4, 0, 3, 1, 3, 2,
        1, 1, 4, 3, 1, 0, 2, 5])}


{'text1_encoder_input_ids': tensor([[   2, 3139, 6213,  ...,    0,    0,    0],
        [   2, 3233, 6804,  ...,    0,    0,    0],
        [   2, 4955, 1423,  ...,  

In [None]:
sentence1 = sts_dev['sentence1'].tolist()
sentence2 = sts_dev['sentence2'].tolist()

sentence1_batch = tokenizer.batch_encode_plus(sentence1, return_tensors='pt', padding=True)
sentence2_batch = tokenizer.batch_encode_plus(sentence2, return_tensors='pt', padding=True)

In [51]:
tokenizer.batch_encode_plus(['안녕하세요','반갑습니다'])

{'input_ids': [[2, 3135, 5724, 7814, 3], [2, 2207, 5345, 6701, 3]], 'token_type_ids': [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]}

In [53]:
tokenizer.encode('안녕하세요','반갑습니다')

[2, 3135, 5724, 7814, 3, 2207, 5345, 6701, 3]

# Original Paper code

In [None]:
import pdb
pdb.set_trace()

In [None]:
def evaluate(
    self,
    eval_dataset: Optional[Dataset] = None,
    ignore_keys: Optional[List[str]] = None,
    metric_key_prefix: str = "eval",
    eval_senteval_transfer: bool = False,
) -> Dict[str, float]:

    # SentEval prepare and batcher
    def prepare(params, samples):
        return

    def batcher(params, batch):
        sentences = [' '.join(s) for s in batch]
        batch = self.tokenizer.batch_encode_plus(
            sentences,
            return_tensors='pt',
            padding=True,
        )
        for k in batch:
            batch[k] = batch[k].to(self.args.device)
        with torch.no_grad():
            outputs = self.model(**batch, output_hidden_states=True, return_dict=True, sent_emb=True)
            pooler_output = outputs.pooler_output
        return pooler_output.cpu()

    # Set params for SentEval (fastmode)
    params = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}
    params['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,
                                        'tenacity': 3, 'epoch_size': 2}

    se = senteval.engine.SE(params, batcher, prepare)
    tasks = ['STSBenchmark', 'SICKRelatedness']
    if eval_senteval_transfer or self.args.eval_transfer:
        tasks = ['STSBenchmark', 'SICKRelatedness', 'MR', 'CR', 'SUBJ', 'MPQA', 'SST2', 'TREC', 'MRPC']
    self.model.eval()
    results = se.eval(tasks)
    
    stsb_spearman = results['STSBenchmark']['dev']['spearman'][0]
    sickr_spearman = results['SICKRelatedness']['dev']['spearman'][0]

    metrics = {"eval_stsb_spearman": stsb_spearman, "eval_sickr_spearman": sickr_spearman, "eval_avg_sts": (stsb_spearman + sickr_spearman) / 2} 
    if eval_senteval_transfer or self.args.eval_transfer:
        avg_transfer = 0
        for task in ['MR', 'CR', 'SUBJ', 'MPQA', 'SST2', 'TREC', 'MRPC']:
            avg_transfer += results[task]['devacc']
            metrics['eval_{}'.format(task)] = results[task]['devacc']
        avg_transfer /= 7
        metrics['eval_avg_transfer'] = avg_transfer

    self.log(metrics)
    return metrics