from transformers import BertModel, BertTokenizer
model = BertModel.from_pretrained("bert-large-uncased")
model.save_pretrained("/home/amitgajbhiye/cardiff_work/100k_data_experiments/bert_large_uncased_pretrained/model/")
tok = BertTokenizer.from_pretrained("bert-large-uncased")
tok.save_pretrained("/home/amitgajbhiye/cardiff_work/100k_data_experiments/bert_large_uncased_pretrained/tokenizer/")

In [None]:
import numpy as np
import pandas as pd
import os

import torch
import nltk

from model.concept_property_model import ConceptPropertyModel
from utils.functions import create_model
from utils.functions import load_pretrained_model
from utils.functions import read_config
from utils.functions import mcrae_dataset_and_dataloader

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# assert os.environ["CONDA_DEFAULT_ENV"] == "gvenv", "Activate 'gvenv' conda environment"

print (f"Device Name : {device}")
print (f"Conda Environment Name : {os.environ['CONDA_DEFAULT_ENV']}")

In [None]:
def pos_tagger(x):
    
    tokens = nltk.word_tokenize(x)
    # print ("tokens :", tokens)
    # print ("pos tags :", nltk.pos_tag(tokens))
    return nltk.pos_tag(tokens)
    

In [None]:
def tag_adj(pos_tag_list):
    
    tags = [word_tag[1] for word_tag in pos_tag_list]
    # print ("tags :", tags)
    # print ([tag == "JJ" for tag in tags])
    # print (all([tag == "JJ" for tag in tags]))
    
    if all ([tag == "JJ" for tag in tags]):
        # print (f"Returning True : {tags}")
        return True
    else:
        # print (f"Returning False : {tags}")
        return False


In [None]:
# Function to count properties

local_files_list = ["data/train_data/500k_MSCG/mscg_prefix_adj_41k_train.tsv",
             "data/train_data/500k_MSCG/gkb_prop_500k_train.tsv"]

prefix_adj_local_files_list = ["data/train_data/500k_MSCG/mscg_prefix_adj_41k_train.tsv"]

hawk_files_list = ["/scratch/c.scmag3/biencoder_concept_property/data/train_data/500k_MSCG/mscg_prefix_adj_41k_train.tsv",
             "/scratch/c.scmag3/biencoder_concept_property/data/train_data/500k_MSCG/gkb_prop_500k_train.tsv"]

def read_con_prop_data (files_list):
    
    df_list = []
    for i, file in enumerate(files_list):
        df_list.append(pd.read_csv(file, sep="\t", names=["concept", "property"]))
        
    print (f"Shapes of the DF read : {[df.shape for df in df_list]}")
    
    df = pd.concat(df_list, axis=0, ignore_index=True)
    print (f"Columns in concatenated DF : {df.columns}")
    print (f"Concatenated DF shape : {df.shape}")
    
    df.dropna(axis=0, how="any", inplace=True)
    df.drop_duplicates(subset=['concept', 'property'], keep="first", inplace=True)
    
    df["prop_count"] = -1
    
    unique_property = df["property"].unique()
    
    print ("num_unique_property :", unique_property.shape, flush=True)
    print ("unique_property :", unique_property, flush=True)
    
    df.set_index("property", inplace=True)
    
    for i, prop in enumerate(unique_property):
        df.loc[prop, "prop_count"] = df.loc[prop].shape[0]
    
    df.reset_index(inplace=True)
    
    df = df[["concept", "property", "prop_count"]]
    
    df.to_csv("data/evaluation_data/nn_analysis/only_prefix_adj_with_prop_count.tsv", sep='\t', index=None, header=True)

# read_con_prop_data(files_list=prefix_adj_local_files_list)

In [None]:
def get_top_k_properties(con_prop_file, pos_tag = False, cut_off = 5):
    
    df = pd.read_csv(con_prop_file, sep="\t", header=0)

    # df.sort_values("prop_count", ascending=False, inplace=True)

    # print (f"DF sorted on prop count : {df}")

    df_prop_count_cut_off = df[df["prop_count"] >= cut_off]
    
    # df_prop_count_cut_off = df_prop_count_cut_off[0:2000]

    print (f"Dataframe with prop_count >= {cut_off} = {df_prop_count_cut_off.shape}")
    
    if pos_tag:
        df_prop_count_cut_off["pos_tag"] = df_prop_count_cut_off["property"].apply(pos_tagger)
        df_prop_count_cut_off["is_only_adj"] = df_prop_count_cut_off["pos_tag"].apply(tag_adj)
        df_prop_count_cut_off = df_prop_count_cut_off[df_prop_count_cut_off["is_only_adj"] == True]
        
        adj_true_df = df_prop_count_cut_off[df_prop_count_cut_off["is_only_adj"] == True]
        
        print (f"adj_true_df shape {adj_true_df.shape}")
        print (f"adj_true_df['property'].unique - len :", adj_true_df['property'].unique().shape)
        
    
    df_prop_count_cut_off.to_csv("data/evaluation_data/nn_analysis/df_with_tags.tsv", sep="\t", index=False)
    print (df_prop_count_cut_off)
    
    unique_properties = df_prop_count_cut_off["property"].unique()
    unique_properties = [x.strip().replace("(part)", "").replace(".", "") for x in unique_properties]
    num_unique_properties = df_prop_count_cut_off["property"].unique().shape

    print (f"Number of unique properties in cut_off DF : {num_unique_properties}")
    # print (f"Unique Properties are : {unique_properties}")
    
    df_list = [("dummy", prop, 0) for prop in unique_properties]

    df_prop = pd.DataFrame.from_records(df_list)

    df_prop.to_csv("data/evaluation_data/nn_analysis/adjs_prop_count_5_prefix_adj_plus_gkb_prop_with_prop_count.tsv", sep="\t", index=None, header=None)

# get_top_k_properties("data/evaluation_data/nn_analysis/hd_data/prefix_adj_plus_gkb_prop_with_prop_count.tsv", pos_tag=True, cut_off=15)

In [None]:
hd_vocab_file = "data/evaluation_data/nn_analysis/hd_data/1A.english.vocabulary.txt"
test_file = "data/evaluation_data/nn_analysis/hd_data/hd_concept_test.csv"

def preprocess_hd_data(vocab_file, test_concept_file):

    with open(vocab_file,  "r") as f:
        lines = f.readlines()
        lines = [("con_dummy", prop.strip(), int(0)) for prop in lines]
        
    con_prop_vocab_df = pd.DataFrame.from_records(lines)
    # con_prop_vocab_df = pd.DataFrame.from_records(lines)[0:2500]
    
    con_prop_vocab_df.to_csv("data/evaluation_data/nn_analysis/hd_data/properties_hd_vocab_con_prop.tsv", sep="\t", index=None, header=None)
    
    
    test_concepts_df = pd.read_csv(test_concept_file, sep=",", header=0)
    print (f"Test Concepts DF shape : {test_concepts_df.shape}")
    
    test_cons_list = test_concepts_df["hypo"].unique()
    # test_cons_list = test_concepts_df["hypo"].unique()[0:10]
    
    
    print (f"Num Test Concepts : {len(test_cons_list)}")
    
    test_con_prop_list = [(con.strip(), "prop_dummy", int(0)) for con in test_cons_list]
    
    test_con_prop_df  = pd.DataFrame.from_records(test_con_prop_list)
    
    test_con_prop_df.to_csv("data/evaluation_data/nn_analysis/hd_data/concepts_hd_test_con_prop.tsv", sep="\t", index=None, header=None)
    
    
# preprocess_hd_data (vocab_file = hd_vocab_file, test_concept_file= test_file)

#### Loading the BERT Large Model for generating Property Embedding
#### Here change the property test_file in config to the tsv file which contain the properties

local_prop_config_file_path = "configs/nn_analysis/prop_nn_analysis_bert_large_fine_tune_mscg_adj_gkb_config.json"
hawk_prop_config_file_path = "configs/nn_analysis/hawk_prop_nn_analysis_bert_large_fine_tune_mscg_adj_gkb_config.json"

torch.cuda.empty_cache()

prop_config = read_config(hawk_prop_config_file_path)
prop_model = load_pretrained_model(prop_config)
prop_model.eval()
prop_model.to(device)
print("Property Model Loaded")

In [None]:
# Get the embeddings for property and concepts

def get_embedding (model, config):
    
    print (f"Config in get_embedding function : {config}")
    
    test_dataset, test_dataloader = mcrae_dataset_and_dataloader(
        dataset_params=config.get("dataset_params"),
        dataset_type="test",
        data_df=None,
    )
    
    con_list, con_emb, prop_list, prop_emb = [], [], [], []

    for step, batch in enumerate(test_dataloader):

        concepts_batch, property_batch = test_dataset.add_context(batch)

        ids_dict = test_dataset.tokenize(concepts_batch, property_batch)

        (
            concept_inp_id,
            concept_attention_mask,
            concept_token_type_id,
            property_input_id,
            property_attention_mask,
            property_token_type_id,
        ) = [val.to(device) for _, val in ids_dict.items()]

        with torch.no_grad():

            concept_embedding, property_embedding, logits = model(
                concept_input_id=concept_inp_id,
                concept_attention_mask=concept_attention_mask,
                concept_token_type_id=concept_token_type_id,
                property_input_id=property_input_id,
                property_attention_mask=property_attention_mask,
                property_token_type_id=property_token_type_id,
            )
            
            print()
            print (f"Concepts Data :", len(batch[0]))
            print (f"Concepts Data :", batch[0])
            print (f"concept_embedding.shape : {concept_embedding.shape}")
            
            print (f"Property Data :", len(batch[1]))
            print (f"Property Data :", batch[1])
            print (f"property_embedding.shape : {property_embedding.shape}")
            
            # con_vec = [(con, vec) for con, vec in zip (batch[0], concept_embedding)]    
            # prop_vec = [(prop, vec) for prop, vec in zip(batch[1], property_embedding)]
            
            con_list.extend(batch[0])
            con_emb.extend(concept_embedding)
            
            prop_list.extend(batch[1])
            prop_emb.extend(property_embedding)
            
    con_emb = [x.cpu().numpy() for x in con_emb]
    prop_emb = [x.cpu().numpy() for x in prop_emb]
    
    return con_list, con_emb, prop_list, prop_emb
            

_, _, prop_list, prop_emb = get_embedding(prop_model, prop_config)

print (f"prop_list len - {len(prop_list)}, Property Emb Len - {len(prop_emb)}")

In [None]:
def transform(vecs):
    
    maxnorm = max([np.linalg.norm(v) for v in vecs])
    new_vecs = []
    
    for v in vecs:
        new_vecs.append(np.insert(v, 0, np.sqrt(maxnorm**2-np.linalg.norm(v)**2)))
    
    return new_vecs

prop_trans = transform(prop_emb)

prop_name_emb_dict = {"name_list_prop" : prop_list,
                      "untransformed_prop_emb":prop_emb,
                     "transformed_prop_emb" : prop_trans}

print (f"Pickling the transformed property name list and their embeddings.")

with open ("data/evaluation_data/nn_analysis/hd_data/hd_prop_name_emb.pickle", "wb") as f:
    pickle.dump(prop_name_emb_dict, f)
    

for key, value in prop_name_emb_dict.items():
    print (f"{key} : {len(value)}")

print ()
print ("*" * 50)
print (*prop_list, sep="\t")

In [None]:
# Loading the model model to generate concept embeddings
# Here change the concept test file the file where the test (query) concepts are loaded

torch.cuda.empty_cache()

local_con_conf_file_path = "configs/nn_analysis/con_nn_analysis_bert_large_fine_tune_mscg_adj_gkb_config.json"
hawk_con_conf_file_path = "configs/nn_analysis/hawk_con_nn_analysis_bert_large_fine_tune_mscg_adj_gkb_config.json"

con_config = read_config(hawk_con_conf_file_path)
con_model = load_pretrained_model(con_config)
con_model.eval()
con_model.to(device)
print ("Concept Model Loaded")

In [None]:
con_list, con_emb, _, _ = get_embedding(con_model, con_config)

In [None]:
print (f"con_list len - {len(con_list)}, con_emb Len - {len(con_emb)}")

In [None]:
con_trans = transform(con_emb)

In [None]:
con_name_emb_dict = {"name_list_con" : con_list,
                     "untransformed_con_emb": con_emb,
                    "transformed_con_emb" : con_trans}

In [None]:
with open ("data/evaluation_data/nn_analysis/hd_data/hd_con_name_emb.pickle", "wb") as f:
    pickle.dump(con_name_emb_dict, f)

In [None]:
for key, value in con_name_emb_dict.items():
    print (f"{key} : {len(value)}")

print ()
print ("*" * 50)
print (*con_list, sep="\t")

In [1]:
import pickle
import numpy as np
import nltk
from sklearn.neighbors import NearestNeighbors
from collections import Counter
import pandas as pd
from collections import Counter


In [2]:

hd_con_emb_file = "/home/amitgajbhiye/cardiff_work/dot_product_model_nn_analysis/hd_con_name_emb.pickle"
hd_prop_emb_file = "/home/amitgajbhiye/cardiff_work/dot_product_model_nn_analysis/hd_prop_name_emb.pickle"

with open(hd_con_emb_file, "rb") as con_emb, open(hd_prop_emb_file, "rb") as prop_emb:
    
    con_name_emb = pickle.load(con_emb)
    prop_name_emb = pickle.load(prop_emb)

print (con_name_emb.keys())
print (prop_name_emb.keys())

dict_keys(['name_list_con', 'untransformed_con_emb', 'transformed_con_emb'])
dict_keys(['name_list_prop', 'untransformed_prop_emb', 'transformed_prop_emb'])


In [3]:
print (f'Number of Properties in the loaded prop pickel : {len(prop_name_emb.get("name_list_prop"))}', flush=True)
print (f'Number of Untransformed Properties Embedding in the loaded prop pickel : {len(prop_name_emb.get("untransformed_prop_emb"))}', flush=True)
print (f'Number of TRansformed Properties Embedding in the loaded prop pickel : {len(prop_name_emb.get("transformed_prop_emb"))}', flush=True)

print ()
print (f'Number of Concepts in the loaded con pickel : {len(con_name_emb.get("name_list_con"))}', flush=True)
print (f'Number of Untransformed Concepts Embedding in the loaded prop pickel : {len(con_name_emb.get("untransformed_con_emb"))}', flush=True)
print (f'Number of Transformed Concepts Embedding in the loaded prop pickel : {len(con_name_emb.get("transformed_con_emb"))}', flush=True)

Number of Properties in the loaded prop pickel : 218750
Number of Untransformed Properties Embedding in the loaded prop pickel : 218750
Number of TRansformed Properties Embedding in the loaded prop pickel : 218750

Number of Concepts in the loaded con pickel : 1057
Number of Untransformed Concepts Embedding in the loaded prop pickel : 1057
Number of Transformed Concepts Embedding in the loaded prop pickel : 1057


In [4]:
prop_name_emb.get("transformed_prop_emb")[0].shape

(1025,)

In [5]:
num_nearest_neighbours = 40

In [6]:
# Learning Nearest Neighbours
nbrs = NearestNeighbors(n_neighbors=num_nearest_neighbours, algorithm='brute').fit(np.array(prop_name_emb.get("transformed_prop_emb")))

In [7]:
distances, indices = nbrs.kneighbors(np.array(con_name_emb.get("transformed_con_emb")))

In [8]:
print (indices)
print (indices.shape)

[[111587  53846 206831 ...  91284 198873  90031]
 [142837 193586 145629 ...  68416  79256 178580]
 [178563 214337  25392 ...  26206  41861  82366]
 ...
 [ 66877 163981 123163 ...  34639 171636 195632]
 [162411 164091 162408 ... 172759 196704 156403]
 [  1666  93385 190048 ... 198278 206378  37175]]
(1057, 40)


In [9]:
print (con_name_emb.keys())
print (prop_name_emb.keys())

dict_keys(['name_list_con', 'untransformed_con_emb', 'transformed_con_emb'])
dict_keys(['name_list_prop', 'untransformed_prop_emb', 'transformed_prop_emb'])


In [10]:
len(prop_name_emb.get("untransformed_prop_emb"))

218750

for idx, con in zip(indices, con_name_emb.get("name_list_con")):    
    print (f"{con} : {[prop_name_emb.get('name_list_prop') [prop_id] for prop_id in idx]}\n", flush=True)

In [11]:
for idx, con in zip(indices[0:5], con_name_emb.get("name_list_con")[0:5]):    
    print (f"{con} : {[prop_name_emb.get('name_list_prop') [prop_id] for prop_id in idx]}\n", flush=True)

maliciousness : ['maliciousness', 'dodginess', 'vindictiveness', 'gaminess', 'intemperateness', 'infacility', 'meddlesomeness', 'obstinancy', 'unreadiness', 'irascibility', 'inoffensiveness', 'effeteness', 'constitutiveness', 'insolency', 'transgressiveness', 'acerbity', 'fallaciousness', 'pseudorapidity', 'unfeelingness', 'insolence', 'insouciance', 'unfancy', 'ergodicity', 'inexactitude', 'unseriousness', 'disingenuousness', 'inauthenticity', 'ingrateful', 'insensitiveness', 'nobodiness', 'vituperativeness', 'susceptance', 'impiousness', 'obtrusiveness', 'callousness', 'compulsivity', 'bogusness', 'impudence', 'unauthenticity', 'idiocy']

buckler : ['phossy', 'tooley', 'plier', 'louver', 'wounder', 'buckler', 'clamper', 'stitcher', 'cordwainer', 'shoer', 'pilferer', 'stuiver', 'whithersoever', 'hatpin', 'lockie', 'wrastle', 'fixer-upper', 'nailer', 'bobbin', 'strapper', 'bracer', 'threader', 'shoestring', 'fitt', 'lug wrench', 'meaney', 'girdler', 'notcher', 'dowsing rod', 'racker', 

d = {}
for idx, con in zip(indices, con_name_emb.get("name_list_con")):
    d[con] = [prop_name_emb.get('name_list_prop') [prop_id].strip() for prop_id in idx]

In [12]:
def pos_tagger(x):
    
    tokens = nltk.word_tokenize(x)
    # print ("tokens :", tokens)
    # print ("pos tags :", nltk.pos_tag(tokens))
    return nltk.pos_tag(tokens)
    

def filter_prop (con, prop_list):
    
    filtered_prop_list = []
    
    con = con.lower().strip()
    prop_list = [prop.lower().strip() for prop in prop_list]
    
    for prop in prop_list:
        if (con not in prop) and (prop not in con) :
            # print (f"{con} : {prop}, {pos_tagger(prop)}, {pos_tagger(prop)[-1]}")
            
            if pos_tagger(prop)[-1][1] in ("NN","NNS","NNPS"):
                filtered_prop_list.append(prop)
        
            # print (f"filtered_prop_list : {filtered_prop_list}")
    
    print (len(filtered_prop_list))
    print (filtered_prop_list)
    print ()
    
    filtered_prop_list = [prop.strip() for prop in filtered_prop_list]

    if len(filtered_prop_list) >= 15:
        return filtered_prop_list[0:15]
    else:
        return filtered_prop_list

    

d = {}
for idx, con in zip(indices, con_name_emb.get("name_list_con")):
    
    filtered_prop_list = []
    
    prop_for_con = [prop_name_emb.get('name_list_prop') [prop_id].strip() for prop_id in idx]
    
    print (f"concept : {con}")
    print (f"All properties : {prop_for_con}")
    print ([pos_tagger(prop) for prop in prop_for_con] )
    filtered_prop_list = filter_prop(con, prop_for_con)
    
    d[con] = filtered_prop_list
    


concept : maliciousness
All properties : ['maliciousness', 'dodginess', 'vindictiveness', 'gaminess', 'intemperateness', 'infacility', 'meddlesomeness', 'obstinancy', 'unreadiness', 'irascibility', 'inoffensiveness', 'effeteness', 'constitutiveness', 'insolency', 'transgressiveness', 'acerbity', 'fallaciousness', 'pseudorapidity', 'unfeelingness', 'insolence', 'insouciance', 'unfancy', 'ergodicity', 'inexactitude', 'unseriousness', 'disingenuousness', 'inauthenticity', 'ingrateful', 'insensitiveness', 'nobodiness', 'vituperativeness', 'susceptance', 'impiousness', 'obtrusiveness', 'callousness', 'compulsivity', 'bogusness', 'impudence', 'unauthenticity', 'idiocy']
[[('maliciousness', 'NN')], [('dodginess', 'NN')], [('vindictiveness', 'NN')], [('gaminess', 'NN')], [('intemperateness', 'NN')], [('infacility', 'NN')], [('meddlesomeness', 'NN')], [('obstinancy', 'NN')], [('unreadiness', 'NN')], [('irascibility', 'NN')], [('inoffensiveness', 'NN')], [('effeteness', 'NN')], [('constitutivene

In [13]:
l = []
for key, value in d.items():
    print (f"{key} : {len(value)}")
    
    l.append(len(value))

counts = Counter(l)


maliciousness : 15
buckler : 15
spelunker : 15
quo warranto : 15
pragmatism : 15
liberty : 15
shoal : 15
miscreant : 15
pane : 15
apparition : 15
woofer : 15
lofepramine : 15
spot price : 15
homeostasis : 15
staphylococci : 15
arms : 15
respiratory disease : 15
footway : 15
longbow : 15
rookery : 15
engine : 15
bread : 15
styrofoam : 15
bomber : 15
ordinary : 15
wine : 15
relief : 15
policy : 15
hearsay rule : 15
judge : 15
authoritarian regime : 15
law : 15
reflex response : 15
kaon : 15
arbitrage : 15
tartar : 15
manslaughter : 15
luffa : 15
specialist : 15
sarcophagus : 15
collector : 15
microcontroller : 15
festivity : 15
shopping : 15
monoid : 15
catchment area : 15
moderator : 15
ghee : 15
tower : 15
diameter : 15
adventure : 15
director : 15
catsuit : 15
business concern : 15
warship : 15
letter : 15
balsa raft : 15
line of duty : 15
dwarf planet : 15
list : 15
couture : 15
blame : 15
accounting : 15
running : 15
waiver : 15
residence time : 15
residence hall : 15
macadam : 15
s

In [14]:
print (counts)

Counter({15: 1039, 14: 5, 13: 4, 11: 3, 6: 2, 12: 1, 7: 1, 9: 1, 8: 1})


In [15]:
print (len(d.keys()))

1057


In [16]:
df = pd.DataFrame.from_dict(d, orient="index")

In [17]:
print (list(df.columns))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]


In [18]:
df.reset_index(inplace=True, drop=False)

In [19]:
df

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,maliciousness,dodginess,vindictiveness,gaminess,intemperateness,infacility,meddlesomeness,obstinancy,unreadiness,irascibility,inoffensiveness,effeteness,constitutiveness,insolency,transgressiveness,acerbity
1,buckler,phossy,tooley,plier,louver,wounder,clamper,stitcher,cordwainer,shoer,pilferer,stuiver,whithersoever,hatpin,lockie,wrastle
2,spelunker,zanker,brunker,cornette,hurdy gurdy,leddy,reeler,helter-skelter,hurdy-gurdy,bager,peterlin,double reed,beater,chukker,fiddler,bassler
3,quo warranto,dispositio,amendatory,preadoption,ad libitum,acutance,jus soli,residuary clause,grandfather clause,prelaw,legal order,writ of right,embattlement,appro,countersue,waiver
4,pragmatism,philosophism,empiricism,philosophise,rationalism,tractarianism,intellectualism,idealism,moralism,nominalism,materialism,fabulism,epistemicism,occasionalism,stoicism,prosaicism
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1052,shoulder,areal,aread,terma,soft spot,weak spot,joint,posta,cruse,arean,thigh,scarlike,extremity,vulvic,near side,throat
1053,prayer,prays,spiritual practice,meditation,soul-searching,meditations,entreaty,intercession,spiritualisation,liturgic,edification,spiritualization,compline,penitence,apostolates,imparteth
1054,fess,ruff,muss,tummler,ruffly,tufty,shaggy,mowll,spiff,scobie,fee tail,wag,puss,minshull,chinn,dobbie
1055,roaring,bellowing,noise,burbling,clamor,clamour,belling,sonics,drung,hum,reverberation,sound,korotkoff sounds,rattlings,,


In [20]:
hypo_hyper_file_name = "/home/amitgajbhiye/cardiff_work/dot_product_model_nn_analysis/filtered_hd_test_results.csv"
columns =["hypo","hyp=1","hyp=2","hyp=3","hyp=4","hyp=5","hyp=6","hyp=7","hyp=8","hyp=9","hyp=10","hyp=11","hyp=12","hyp=13","hyp=14","hyp=15"]

df.columns = columns

df["hypo"] = df["hypo"].str.strip()

df.to_csv(hypo_hyper_file_name, sep = ",", index=False, header=True)

for idx, con in zip(indices, con_name_emb.get("name_list_con")):
    print (f"{con} : {[prop_name_emb.get('name_list_prop') [prop_id] for prop_id in idx]}\n", flush=True)
