# Learning Simplified Natural Grammar (MIL+GPT)

### Import Packages

In [None]:
from gpt_api import *
import sys
sys.path.insert(0, '../../../')
from PyGol_Final import *
import numpy as np
import matplotlib.pyplot as plt
import pylab as plb

### Background knowledge, examples generation - PyGol

In [None]:
BK_file, pos_example, neg_example, senetence_list= generate_bk_for_natural_language("facts.pl", "ex.pl", "BK_file.pl")

### List to guide the number of runs

In [None]:
list_1 = [1, 2,5,10,15,20]
number_of_tests = 10

### List to save the metrics from all the runs

In [None]:
final_accuracy_pygol = []
final_accuracy_GPT = []
final_accuracy_hybrid = []
default_accuracy = []

### Learning Phase (PyGol and GPT)

In [None]:
for i in list_1:
    print("Number of pos. examples:", i)
    acc_pygol =[]
    acc_chat = []
    acc_hybrid = []
    for j in range(0,number_of_tests):
        #positive train examples - PyGol
        pos_train =  random.sample(pos_example, i)
        #positive test examples - PyGol
        pos_test = set(pos_example).difference(set(pos_train))
        #negative train examples - PyGol
        neg_train =  random.sample(neg_example, i)
        #negative test examples - PyGol
        neg_test = set(neg_example).difference(set(neg_train))
        #positive train examples - GPT
        pos_train_list = [senetence_list[i] for i in pos_train]
        #positive test examples - GPT
        pos_test_list = [senetence_list[i] for i in pos_test]
        #negative train examples - GPT
        neg_train_list = [senetence_list[i] for i in neg_train]
        #negative test examples - GPT
        neg_test_list = [senetence_list[i] for i in neg_test]
        #Bottom clause generation - PyGol
        P, N = bottom_clause_generation(constant_set = [], container = "memory", file = BK_file,
                                        positive_example = pos_train, 
                                        negative_example = neg_train, 
                                        tqdm_disable=True)
        #Hypothesis from PyGol
        H = pygol_learn_natural_language(P,N)
        #Proposition hypothesis to use in GPT
        H_11 = hypo_prepositional(H)
        # Test hypothesis from Pygol starts here
        model = evaluate_theory_prolog(H, BK_file, pos_test, neg_test, verbose=True)
        accuracy_pygol= model.accuracy
        acc_pygol.append(accuracy_pygol)
        # Test hypothesis from Pygol ends here
        # Test hypothesis from GPT starts here
        result_1 = evaluate_model_with_chatgpt(pos_train_list,neg_train_list, 
                                             pos_test_list, neg_test_list)
        acc_GPT= calc_acc_gpt(result_1)
        acc_chat.append(acc_GPT)
        # Test hypothesis from GPT ends here
        # Test hypothesis from GPT+MIL starts here
        result_2 = evaluate_model_with_chatgpt_hybrid(pos_train_list,neg_train_list, 
                                             pos_test_list, neg_test_list,H_11)
        acc_GPT_hybrid = calc_acc_gpt(result_2)
        acc_hybrid.append(acc_GPT_hybrid)
        # Test hypothesis from GPT_MIL ends here
    print("\tPyGol",np.mean(acc_pygol))
    final_accuracy_pygol.append(acc_pygol)
    print("\tGPT",np.mean(acc_chat))
    final_accuracy_GPT.append(acc_chat)
    print("\tGPT",np.mean(acc_hybrid))
    final_accuracy_hybrid.append(acc_hybrid)

### Set Default Accuracy list according to the length of items in 'list_1'

In [None]:
sample_list = [0.5, 0.5]
for i in list_1:
    default_accuracy.append(sample_list)

In [None]:

plb.rcParams['font.size'] = 12

means_default = []
errors_default = []
for sublist in default_accuracy:
    np_sublist = np.array(sublist)
    mean = np.mean(np_sublist)
    std_dev = np.std(np_sublist)
    sem = std_dev / np.sqrt(len(sublist))  
    means_default.append(mean)
    errors_default.append(sem)

means_pygol = []
errors_pygol = []
for sublist in final_accuracy_pygol:
    np_sublist = np.array(sublist)
    mean = np.mean(np_sublist)
    std_dev = np.std(np_sublist)
    sem = std_dev / np.sqrt(len(sublist))  
    means_pygol.append(mean)
    errors_pygol.append(sem)

means_chat = []
errors_chat = []
for sublist in final_accuracy_hybrid:
    np_sublist = np.array(sublist)
    mean = np.mean(np_sublist)
    std_dev = np.std(np_sublist)
    sem = std_dev / np.sqrt(len(sublist))  
    means_chat.append(mean)
    errors_chat.append(sem)

means_chat_hybrid = []
errors_chat_hybrid = []
for sublist in final_accuracy_GPT:
    np_sublist = np.array(sublist)
    mean = np.mean(np_sublist)
    std_dev = np.std(np_sublist)
    sem = std_dev / np.sqrt(len(sublist))  
    means_chat_hybrid.append(mean)
    errors_chat_hybrid.append(sem)


# Creating the plot
plt.figure(figsize=(10, 6))
plt.errorbar(list_1, means_pygol, yerr=errors_pygol, fmt='--', capsize=5, ecolor='red', color='red', markersize=1, label='PyGol')
plt.errorbar(list_1, means_chat, yerr=errors_chat, fmt='--', capsize=5, ecolor='blue', color='blue', markersize=1, label='GPT')
plt.errorbar(list_1, means_chat_hybrid, yerr=errors_chat_hybrid, fmt='--', capsize=5, ecolor='green', color='green', markersize=1, label='GPT+PyGol')

plt.errorbar(list_1, means_default, yerr=errors_default, fmt='--', capsize=5, ecolor='black', color='black', markersize=1, label='Deafult')

custom_xtick_positions = [0, 4,  8,  12,  16, 20]
custom_xtick_labels = ['0', '4',  '8',  '12',  '16',  '20']
plt.xticks(ticks=custom_xtick_positions, labels=custom_xtick_labels)
plt.legend()
plt.xlabel('Number of examples')
plt.ylabel('Accuracy')
plt.xlim(0.0, 20.1)  # Set x-axis range
plt.ylim(0.4, 1.1)
# Show the plot
plt.legend( bbox_to_anchor=[1.1, 0.5], 
           loc='center', ncol=1)
plt.savefig('plot.png')
plt.show()
