In [1]:
import pandas as pd
from scripts.style_generation import get_style_genre
from scripts.first_n_words import get_first_n_words
from scripts.llm import ask_LLM
from scripts.kg_content import extract_kg_content
from scripts.minhash_vector import create_minhash_vector
from scripts.reconstruction_content import extract_reconstruction_content
from scripts.evaluate import evaluate_peformance
import scripts.prompts
import scripts.api_key

In [2]:
# Load the dataset from Hugging Face
dataset = pd.read_csv("data/ML-Arxiv-Papers.csv")
rows, columns = dataset.shape
# Extract the 'train' split
#train_dataset = dataset["train"]

# Create lists for titles and abstracts
# titles = [entry['title'] for entry in train_dataset]
# abstracts = [entry['abstract'] for entry in train_dataset]

# Create a list with concatenated title and abstract for each sample
concatenated_texts = dataset['abstract'] #[f"{title} {abstract}" for title, abstract in zip(titles, abstracts)]

API_KEY = scripts.api_key.API_KEY


stop_len = 50000 #5000

model_name = "meta-llama/Llama-3-70b-chat-hf"
#"meta-llama/Llama-3-70b-chat-hf"
#"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"
system_prompt = "You are a very smart very intelligence assistant who is very helpful."

all_kg_results = []
all_reconstruction_results = []
input_string_so_far_list = []


In [3]:
rows

117592

In [4]:
def KG_construction_and_reconstruction(input_text, model):
    writing_style = get_style_genre(get_first_n_words(input_text, len(input_text)), model_name, system_prompt) #len(input_text) 1000
    sentences = [input_text]
    current_kg = []
    current_kg.append("<style_analysis>" + writing_style + "</style_analysis>")
    segment_nr = 1
    reconstruction_so_far = ""
    input_string_so_far = ""
    for sentence in sentences:
        input_string_so_far += sentence
        if len(input_string_so_far) > stop_len:
            break
        current_kg_context = current_kg[-50:] if len(current_kg) > 50 else current_kg

            # Concatenate the elements into a single string
        current_kg_context = ' '.join(current_kg_context)

        text = scripts.prompts.KG_format_example_prompt(current_kg_context, sentence)
        
        
        try:
            for i in range(2):
                knowledge_graph_segment = ask_LLM(model_name, system_prompt,text, API_KEY, temperature=0.5, top_p=0.95, max_tokens=1000,
                                                    frequency_penalty=1.1, presence_penalty=1.1)
                if not (extract_kg_content(knowledge_graph_segment) == None):
                    break
            try:
                current_kg.append("<segment " + str(segment_nr) + ">\n" + extract_kg_content(
                        knowledge_graph_segment) + "<source_sentence_min_hash: " + str(
                        create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
                print("<segment " + str(segment_nr) + ">\n" + extract_kg_content(
                        knowledge_graph_segment) + "<source_sentence_min_hash: " + str(
                        create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
            except:
                current_kg.append(
                        "<segment " + str(segment_nr) + ">\n" + knowledge_graph_segment + "<source_sentence_min_hash: " + str(
                            create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
                print("<segment " + str(segment_nr) + ">\n" + knowledge_graph_segment + "<source_sentence_min_hash: " + str(
                        create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")

            prompt = scripts.prompts.KG_reconstruction_prompt(reconstruction_so_far, current_kg)
            for i in range(2):
                next_reconstruction = ask_LLM(model_name,
                                                "You are a very smart very intelligence assistant who is very helpful.",
                                                prompt, API_KEY, temperature=0.5, top_p=0.95, max_tokens=4000,
                                                frequency_penalty=1.1, presence_penalty=1.1)
                if not (extract_reconstruction_content(next_reconstruction) == None):
                    break

            reconstruction_so_far += extract_reconstruction_content(next_reconstruction)
                #print(reconstruction_so_far)

            print(extract_reconstruction_content(next_reconstruction))
            segment_nr += 1

            #all_kg_results.append(current_kg)
            #print("....................start....................................")
            #print(current_kg.split("<source_sentence_min_hash:"))

            #print("...............current kg........................")
            #print(current_kg)

            #kg_String = ''.join(current_kg)
        
        except:
            print("No Kg found")
            
#         try:
#             all_kg_results.append(current_kg)

#                 #print(".....................current kg end.........................")
#                 #all_reconstruction_results.append(reconstruction_so_far)
#                 #print(reconstruction_so_far.split("<source_sentence_min_hash:")[0])
#                 #reconstruction_String = ''.join(reconstruction_so_far)
#             all_reconstruction_results.append(reconstruction_so_far)
#                 #print("....................end....................................")

#             input_string_so_far_list.append(input_string_so_far)
        
#         except:
#             print("Pass because of no Kg found")
            
#     return input_string_so_far_list, all_kg_results, all_reconstruction_results
    return input_string_so_far, current_kg, reconstruction_so_far


In [5]:
def split_long_text(text, segment_length):
    words = text.split()
    segments = []
    current_segment = []
    word_count = 0

    for word in words:
        current_segment.append(word)
        word_count += 1

        if word_count >= segment_length:
            segments.append(' '.join(current_segment))
            current_segment = []
            word_count = 0

    # Add any remaining words as a last segment
    if current_segment:
        #segments.append(current_segment)
        segments.append(' '.join(current_segment))

    return segments

In [6]:
segment_length = 100

In [7]:
def flatten_list_of_lists(list_of_lists):
    flattened_list = [item for sublist in list_of_lists for item in sublist]
    return ''.join(flattened_list)

In [8]:
input_string_so_far_list = []
all_kg_results = []
all_reconstruction_results = []

count = 0
for input_text in concatenated_texts[0:3]:
    print(count)
    
    count = count+1
    print(len(input_text))
    split_text = split_long_text(input_text, segment_length)
    
    #print(split_text[1])
    input_split_texts = []
    kg_for_input_split_texts = []
    reconstruction_results_for_input_split_texts = []
    
    for each_split_text in split_text[0:len(split_text)]:
        
        print(each_split_text)
        input_string_list, kg_results, reconstruction_results = KG_construction_and_reconstruction(each_split_text, model_name)
        input_split_texts.append(input_string_list)
        kg_for_input_split_texts.append(kg_results)
        reconstruction_results_for_input_split_texts.append(reconstruction_results)
        
    input_string_so_far_list.append(flatten_list_of_lists(input_split_texts))
    all_kg_results.append(flatten_list_of_lists(kg_for_input_split_texts))
    all_reconstruction_results.append(flatten_list_of_lists(reconstruction_results_for_input_split_texts))
    
    #for i in range(0,len(split_text),1):
     #   input_string_so_far_list, all_kg_results, all_reconstruction_results = KG_construction_and_reconstruction(split_text[i], model_name)
    
    
    
    
#     if len(input_text)>1000:
#         pass
#     else:
#         input_string_so_far_list, all_kg_results, all_reconstruction_results = KG_construction_and_reconstruction(input_text, model_name)
            
        


0
985
The problem of statistical learning is to construct a predictor of a random variable $Y$ as a function of a related random variable $X$ on the basis of an i.i.d. training sample from the joint distribution of $(X,Y)$. Allowable predictors are drawn from some specified class, and the goal is to approach asymptotically the performance (expected loss) of the best predictor in the class. We consider the setting in which one has perfect observation of the $X$-part of the sample, while the $Y$-part has to be communicated at some finite bit rate. The encoding of the $Y$-values is allowed to
<segment 1>


'Statistical Learning Problem': {
  'relations': {
    'defined_by': 'Constructing a predictor of a random variable',
    'involves': 'Random variables X and Y',
    'based_on': 'i.i.d. training sample'
  },
  'attributes': {
    'description': 'Constructing a predictor of a random variable Y as a function of X',
    'goal': 'Approach asymptotically the performance of the best predictor



In a sensor network, communication errors or failures can occur randomly and unpredictably, affecting the communication among sensors. These errors or failures are a characteristic of the sensor network, which is prone to errors or failures, costly, and constrained. The sensor network operates under scarce resources, including power, data rate, and communication, which limits its performance.

The communication in the sensor network is influenced by the signal-to-noise ratio (SNR), which is a main factor in determining the probability of error or communication failure. The SNR determines the link quality, which is a measure of communication reliability. A higher SNR results in a lower probability of error or communication failure, thereby improving the link quality.

In the context of sensor networks, topology design is a critical problem that involves assigning probabilities of reliable communication. The goal of topology design is to maximize the rate of communication while ensurin

<segment 1>


'On-line Shortest Path Problem': {
  'relations': {
    'considered_under': 'Partial Monitoring Models',
    'involves': 'Weighted Directed Acyclic Graph'
  },
  'attributes': {
    'description': 'Optimizing path choice with minimal loss',
    'context': 'Arbitrary edge weight changes'
  }
},

'Partial Monitoring Models': {
  'relations': {
    'generalize': 'Multi-armed Bandit Problem'
  },
  'attributes': {
    'description': 'Learning from partial feedback'
  }
},

'Weighted Directed Acyclic Graph': {
  'relations': {
    'has': 'Edges with Changing Weights',
    'features': 'Two Distinguished Vertices'
  },
  'attributes': {
    'description': 'Graph with dynamic edge weights'
  }
},

'Edges with Changing Weights': {
  'relations': {
    'part_of': 'Weighted Directed Acyclic Graph'
  },
  'attributes': {
    'description': 'Arbitrary weight changes'
  }
},

'Decision Maker': {
  'relations': {
    'plays': 'Game',
    'learns_from': 'Partial Feedback'
  },
  'attribu

In [9]:
# len(input_split_texts)

In [10]:
# input_split_texts

In [11]:
# kg_for_input_split_texts

In [12]:
# reconstruction_results_for_input_split_texts

In [13]:
df = pd.DataFrame({
    'Input_Texts': input_string_so_far_list,
    'Output_Graphs': all_kg_results,
    'Output_Reconstructions': all_reconstruction_results, })


print(df)

# print("500 word sample evalution:", "\n")
# base_cap_500, original_cap_500, knowledgegraph_cap_500, reconstruction_cap_500,QA_df = evaluate_peformance(df, 2,
#                                                                                                      "q_a_kg.parquet")

# print("No context correct answer percentage:", base_cap_500, "\n")
# print("Original context correct answer percentage:", original_cap_500, "\n")
# print("Knowledgegraph context correct answer percentage:", knowledgegraph_cap_500, "\n")
# print("Reconstruckted text context correct answer percentage:", reconstruction_cap_500, "\n")





                                         Input_Texts  \
0  The problem of statistical learning is to cons...   
1  In a sensor network, in practice, the communic...   
2  The on-line shortest path problem is considere...   

                                       Output_Graphs  \
0  <style_analysis>This text exemplifies a formal...   
1  <style_analysis>This text exemplifies a techni...   
2  <style_analysis>This text is a prime example o...   

                              Output_Reconstructions  
0  \n\nIn the context of statistical learning pro...  
1  \n\nIn a sensor network, communication errors ...  
2  \n\nIn the context of the On-line Shortest Pat...  


In [14]:
df.to_csv("data/test.csv", encoding='utf-8', index=False)
#QA_df.to_csv("data/questions_answer_save_200.csv", encoding='utf-8', index=False)

In [None]:
# i = 0
# for input_text in concatenated_texts[2451:2500]:
#     print(i)
    
#     i = i+1
#     print(len(input_text))
    
#     #writing_style = get_style_genre(model_name, system_prompt, get_first_n_words(input_text, len(input_text))) #len(input_text) 1000
#     writing_style = get_style_genre(get_first_n_words(input_text, len(input_text)), model_name, system_prompt) #len(input_text) 1000
    
#     sentences = [input_text]
#     current_kg = []
#     current_kg.append("<style_analysis>" + writing_style + "</style_analysis>")
#     segment_nr = 1
#     reconstruction_so_far = ""
#     input_string_so_far = ""
#     for sentence in sentences:
#         input_string_so_far += sentence
#         if len(input_string_so_far) > stop_len:
#             break
#         current_kg_context = current_kg[-50:] if len(current_kg) > 50 else current_kg

#             # Concatenate the elements into a single string
#         current_kg_context = ' '.join(current_kg_context)

#         text = scripts.prompts.KG_format_example_prompt(current_kg_context, sentence)
        
        
#         try:
#             for i in range(2):
#                 knowledge_graph_segment = ask_LLM(model_name,
#                                                     "You are a very smart very intelligence assistant who is very helpful.",
#                                                     text, API_KEY, temperature=0.5, top_p=0.95, max_tokens=1000,
#                                                     frequency_penalty=1.1, presence_penalty=1.1)
#                 if not (extract_kg_content(knowledge_graph_segment) == None):
#                     break
#             try:
#                 current_kg.append("<segment " + str(segment_nr) + ">\n" + extract_kg_content(
#                         knowledge_graph_segment) + "<source_sentence_min_hash: " + str(
#                         create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
#                 print("<segment " + str(segment_nr) + ">\n" + extract_kg_content(
#                         knowledge_graph_segment) + "<source_sentence_min_hash: " + str(
#                         create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
#             except:
#                 current_kg.append(
#                         "<segment " + str(segment_nr) + ">\n" + knowledge_graph_segment + "<source_sentence_min_hash: " + str(
#                             create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
#                 print("<segment " + str(segment_nr) + ">\n" + knowledge_graph_segment + "<source_sentence_min_hash: " + str(
#                         create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")

#             prompt = scripts.prompts.KG_reconstruction_prompt(reconstruction_so_far, current_kg)
#             for i in range(2):
#                 next_reconstruction = ask_LLM(model_name,
#                                                 "You are a very smart very intelligence assistant who is very helpful.",
#                                                 prompt, API_KEY, temperature=0.5, top_p=0.95, max_tokens=4000,
#                                                 frequency_penalty=1.1, presence_penalty=1.1)
#                 if not (extract_reconstruction_content(next_reconstruction) == None):
#                     break

#             reconstruction_so_far += extract_reconstruction_content(next_reconstruction)
#                 #print(reconstruction_so_far)

#             print(extract_reconstruction_content(next_reconstruction))
#             segment_nr += 1

#             #all_kg_results.append(current_kg)
#             #print("....................start....................................")
#             #print(current_kg.split("<source_sentence_min_hash:"))

#             #print("...............current kg........................")
#             #print(current_kg)

#             #kg_String = ''.join(current_kg)
        
#         except:
#             print("No Kg found")
            
#         try:
#             all_kg_results.append(current_kg)

#                 #print(".....................current kg end.........................")
#                 #all_reconstruction_results.append(reconstruction_so_far)
#                 #print(reconstruction_so_far.split("<source_sentence_min_hash:")[0])
#                 #reconstruction_String = ''.join(reconstruction_so_far)
#             all_reconstruction_results.append(reconstruction_so_far)
#                 #print("....................end....................................")

#             input_string_so_far_list.append(input_string_so_far)
        
#         except:
#             print("Pass because of no Kg found")


In [None]:
# i = 0

# for input_text in concatenated_texts[101:200]:
#     try:
#         print(i)

#         i = i+1
#         print(len(input_text))
#         writing_style = get_style_genre(get_first_n_words(input_text, len(input_text))) #len(input_text) 1000
#         sentences = [input_text]
#         current_kg = []
#         current_kg.append("<style_analysis>" + writing_style + "</style_analysis>")
#         segment_nr = 1
#         reconstruction_so_far = ""
#         input_string_so_far = ""
#         for sentence in sentences:
#             input_string_so_far += sentence
#             if len(input_string_so_far) > stop_len:
#                 break
#             current_kg_context = current_kg[-50:] if len(current_kg) > 50 else current_kg

#                 # Concatenate the elements into a single string
#             current_kg_context = ' '.join(current_kg_context)

#             text = scripts.prompts.KG_format_example_prompt(current_kg_context, sentence)

#             for i in range(2):
#                 knowledge_graph_segment = ask_LLM(model_name,
#                                                     "You are a very smart very intelligence assistant who is very helpful.",
#                                                     text, API_KEY, temperature=0.1, top_p=0.95, max_tokens=1000,
#                                                     frequency_penalty=1.1, presence_penalty=1.1)
#                 if not (extract_kg_content(knowledge_graph_segment) == None):
#                     break
#             try:
#                 current_kg.append("<segment " + str(segment_nr) + ">\n" + extract_kg_content(
#                         knowledge_graph_segment) + "<source_sentence_min_hash: " + str(
#                         create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
#                 print("<segment " + str(segment_nr) + ">\n" + extract_kg_content(
#                         knowledge_graph_segment) + "<source_sentence_min_hash: " + str(
#                         create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
#             except:
#                 current_kg.append(
#                         "<segment " + str(segment_nr) + ">\n" + knowledge_graph_segment + "<source_sentence_min_hash: " + str(
#                             create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
#                 print("<segment " + str(segment_nr) + ">\n" + knowledge_graph_segment + "<source_sentence_min_hash: " + str(
#                         create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")

#             prompt = scripts.prompts.KG_reconstruction_prompt(reconstruction_so_far, current_kg)
#             for i in range(2):
#                 next_reconstruction = ask_LLM(model_name,
#                                                 "You are a very smart very intelligence assistant who is very helpful.",
#                                                 prompt, API_KEY, temperature=0.5, top_p=0.95, max_tokens=4000,
#                                                 frequency_penalty=1.1, presence_penalty=1.1)
#                 if not (extract_reconstruction_content(next_reconstruction) == None):
#                     break

#             reconstruction_so_far += extract_reconstruction_content(next_reconstruction)
#                 #print(reconstruction_so_far)

#             print(extract_reconstruction_content(next_reconstruction))
#             segment_nr += 1

#             #all_kg_results.append(current_kg)
#             #print("....................start....................................")
#             #print(current_kg.split("<source_sentence_min_hash:"))

#             #print("...............current kg........................")
#             #print(current_kg)

#             #kg_String = ''.join(current_kg)
#         all_kg_results.append(current_kg)

#             #print(".....................current kg end.........................")
#             #all_reconstruction_results.append(reconstruction_so_far)
#             #print(reconstruction_so_far.split("<source_sentence_min_hash:")[0])
#             #reconstruction_String = ''.join(reconstruction_so_far)
#         all_reconstruction_results.append(reconstruction_so_far)
#             #print("....................end....................................")

#         input_string_so_far_list.append(input_string_so_far)

#     except:
#         pass


In [None]:
# i = 0
# for input_text in concatenated_texts[0:1000]:
#     print(i)
    
#     i = i+1
#     print(len(input_text))
#     #print(input_text)
#     try:

#         writing_style = get_style_genre(get_first_n_words(input_text, len(input_text))) #len(input_text) 1000

#         # sentences= text_to_sentences(input_text)
#         # sentences =sentences_to_large_strings(sentences)
#         sentences = [input_text]
#         # print(sentences)
#         # continue
#         current_kg = []
#         current_kg.append("<style_analysis>" + writing_style + "</style_analysis>")
#         #print("<style_analysis>" + writing_style + "</style_analysis>")
#         segment_nr = 1
#         reconstruction_so_far = ""
#         input_string_so_far = ""
#         for sentence in sentences:
#             input_string_so_far += sentence
#             if len(input_string_so_far) > stop_len:
#                 break
#             #print("INPUT:", sentence)
#             # print("-----")
#             # '''
#             # prompt="""INPUT_TEXT:
#             # """+sentence+"""
#             # INSTRUCTION:
#             # Paraphrase the given input text so that every statement is rephrased into sentences that contain only three to ten words each.
#             #   Use a simple structure and make sure to retain all information, names, numbers, and dates from the original text, without losing
#             #     any information. The output text should consist exclusively of factual, neutrally phrased sentences that are three to ten words
#             #       long. All information must be preserved, but without any artistic nuances. Direct speech in the source text should not be
#             #         replicated as such, but it should be laid out in short sentences who said or did what in which order, ensuring a neutral,
#             #           information-rich text."""
    
#             # reply = ask_LLM ('NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO',
#             #   "You are a very smart very intelligence assistant who is very helpful.",
#             #     input_text , API_KEY ,temperature=0.5,top_p=0.95,max_tokens=1000, frequency_penalty=1.1,presence_penalty=1.1)
#             # '''

#             # Determine the slice of the last 50 elements (if the list has more than 50 elements)
#             current_kg_context = current_kg[-50:] if len(current_kg) > 50 else current_kg

#             # Concatenate the elements into a single string
#             current_kg_context = ' '.join(current_kg_context)

#             #print(".....................KG_format_example_prompt start.......................")
#             text = scripts.prompts.KG_format_example_prompt(current_kg_context, sentence)
#             #print(text)
#             #print(".....................KG_format_example_prompt end.......................")

#             for i in range(2):
#                 knowledge_graph_segment = ask_LLM(model_name,
#                                                 "You are a very smart very intelligence assistant who is very helpful.",
#                                                 text, API_KEY, temperature=0.1, top_p=0.95, max_tokens=1000,
#                                                 frequency_penalty=1.1, presence_penalty=1.1)
#                 if not (extract_kg_content(knowledge_graph_segment) == None):
#                     break
#             try:
#                 current_kg.append("<segment " + str(segment_nr) + ">\n" + extract_kg_content(
#                     knowledge_graph_segment) + "<source_sentence_min_hash: " + str(
#                     create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
#                 print("<segment " + str(segment_nr) + ">\n" + extract_kg_content(
#                     knowledge_graph_segment) + "<source_sentence_min_hash: " + str(
#                     create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
#             except:
#                 current_kg.append(
#                     "<segment " + str(segment_nr) + ">\n" + knowledge_graph_segment + "<source_sentence_min_hash: " + str(
#                         create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")
#                 print("<segment " + str(segment_nr) + ">\n" + knowledge_graph_segment + "<source_sentence_min_hash: " + str(
#                     create_minhash_vector(sentence)) + " >\n" + "</segment " + str(segment_nr) + ">\n")

#             prompt = scripts.prompts.KG_reconstruction_prompt(reconstruction_so_far, current_kg)
#             for i in range(2):
#                 next_reconstruction = ask_LLM(model_name,
#                                             "You are a very smart very intelligence assistant who is very helpful.",
#                                             prompt, API_KEY, temperature=0.5, top_p=0.95, max_tokens=4000,
#                                             frequency_penalty=1.1, presence_penalty=1.1)
#                 if not (extract_reconstruction_content(next_reconstruction) == None):
#                     break

#             reconstruction_so_far += extract_reconstruction_content(next_reconstruction)
#             #print(reconstruction_so_far)
            
#             print(extract_reconstruction_content(next_reconstruction))
#             segment_nr += 1
            
#         #all_kg_results.append(current_kg)
#         #print("....................start....................................")
#         #print(current_kg.split("<source_sentence_min_hash:"))
        
#         #print("...............current kg........................")
#         #print(current_kg)
        
#         #kg_String = ''.join(current_kg)
#         all_kg_results.append(current_kg)
        
#         #print(".....................current kg end.........................")
#         #all_reconstruction_results.append(reconstruction_so_far)
#         #print(reconstruction_so_far.split("<source_sentence_min_hash:")[0])
#         #reconstruction_String = ''.join(reconstruction_so_far)
#         all_reconstruction_results.append(reconstruction_so_far)
#         #print("....................end....................................")
        
#         input_string_so_far_list.append(input_string_so_far)
        
        
# #         print("\n")
# #         print("......all_kg_results............")
# #         print("\n")
# #         print(all_kg_results)
        
# #         print("\n")
# #         print("......reconstruction text............")
# #         print("\n")
# #         print(all_reconstruction_results)
#     except:
#         print(i)
#         pass
        

In [None]:
# df = pd.DataFrame({
#     'Input_Texts': input_string_so_far_list,
#     'Output_Graphs': all_kg_results,
#     'Output_Reconstructions': all_reconstruction_results, })


# print(df)

# # print("500 word sample evalution:", "\n")
# # base_cap_500, original_cap_500, knowledgegraph_cap_500, reconstruction_cap_500,QA_df = evaluate_peformance(df, 2,
# #                                                                                                      "q_a_kg.parquet")

# # print("No context correct answer percentage:", base_cap_500, "\n")
# # print("Original context correct answer percentage:", original_cap_500, "\n")
# # print("Knowledgegraph context correct answer percentage:", knowledgegraph_cap_500, "\n")
# # print("Reconstruckted text context correct answer percentage:", reconstruction_cap_500, "\n")





In [None]:
# df.to_csv("data/df_save_2500.csv", encoding='utf-8', index=False)
# #QA_df.to_csv("data/questions_answer_save_200.csv", encoding='utf-8', index=False)

In [None]:
#1000 paper for cot: 

# No context correct answer percentage: 44.19784400760939 

# Original context correct answer percentage: 84.14647730437204 

# Knowledgegraph context correct answer percentage: 77.47158824081902 

# Reconstruckted text context correct answer percentage: 77.27733804656881 

