# Title

In [1]:
from openai import OpenAI
import nltk
from nltk.tokenize.treebank import TreebankWordDetokenizer
import random
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
client = OpenAI()
detokenize = TreebankWordDetokenizer().detokenize

In [51]:
corpus = pd.concat([pd.Series(nltk.corpus.udhr.sents(f), name = f[:-7]) for f in nltk.corpus.udhr.fileids() if 'Latin1' in f], axis = 1)
min_num_sentences = min([len(nltk.corpus.udhr.sents(f)) for f in nltk.corpus.udhr.fileids() if 'Latin1' in f])
corpus = corpus.head(min_num_sentences)

In [52]:
corpus.iloc[:3, :3]

Unnamed: 0,Achehnese,Achuar-Shiwiar,Afaan_Oromo_Oromiffa
0,"[PEUNYATAANUMUM, TEUNTANG, HAK, -, HAK, ASASI,...","[Mash, Nungkanam, Pujuinau, Angkan, Pengker, P...","[Labsii, Walii, -, gala, Mirgoota, Namummaa, M..."
1,"[Tiep, ureung, lahee, dengon, hak, -, hak, dro...","[Aints, ainauti, mash, metek, ainaji, .]","[SEENSA, Ulfinni, fi, wal, -, qixxummaan, ilmo..."
2,"[Pane, "", komitmen, "", bak, awai, phon, ,, put...","[Tu, nintimsar, pujakrikia, chikich, ainaun, w...","[Qabxii, 1, Namooti, hundinuu, birmaduu, ta, '..."


In [276]:
languages = pd.Series([lang for lang in corpus.columns])

Now that we have a dataset (and a suite of tools for adding to the dataset), let's focus on getting the LLM to learn the condition in context and evaluate the LLM's learning. 

In [182]:
def build_training_sentences_true(corpus, lang, train_k = 3, seed = None):
    training_sentences_true = corpus[lang].sample(train_k, random_state = seed)
    return training_sentences_true

def build_training_sentences_false(corpus, lang, train_k = 3, seed = None):
    sample_space_false = corpus.loc[:, corpus.columns != lang]
    training_sentences_false = pd.Series([sample_space_false.sample(axis = 0, random_state = seed).sample(axis = 1, random_state = seed).iloc[0, 0] for _ in range(train_k)])
    return training_sentences_false

In [232]:
def build_test_sentences_true(corpus, lang, training_sentences_true, test_k = 10, seed = None):
    sample_space_true = corpus[~corpus.index.isin(training_sentences_true.index)][lang]
    test_sentences_true = sample_space_true.sample(test_k, random_state = seed)
    return test_sentences_true
    
def build_test_sentences_false(corpus, lang, training_sentences_false, test_k = 10, seed = None):
    sample_space_false = corpus.loc[:, corpus.columns != lang]
    sample_space_false = sample_space_false[~sample_space_false.index.isin(training_sentences_false.index)]
    test_sentences_false = pd.Series([sample_space_false.sample(axis = 0, random_state = seed).sample(axis = 1, random_state = seed).iloc[0, 0] for _ in range(test_k)])
    return test_sentences_false
    

In [215]:
def build_train_test_data(corpus, lang, train_k = 3, test_k = 10, seed = None):
    
    train_true = build_training_sentences_true(corpus, lang, train_k, seed)
    train_false = build_training_sentences_false(corpus, lang, train_k, seed)

    test_true = build_test_sentences_true(corpus, lang, train_true, test_k, seed)
    test_false = build_test_sentences_false(corpus, lang, train_false, test_k, seed)

    return train_true, train_false, test_true, test_false

In [184]:
example_language = 'English'

In [200]:
train_true = build_training_sentences_true(corpus, example_language)
train_false = build_training_sentences_false(corpus, example_language)
test_true = build_test_sentences_true(corpus, example_language, train_true)
test_false = build_test_sentences_false(corpus, example_language, train_false)

In [186]:
test_true

26    [Article, 12, No, one, shall, be, subjected, t...
6     [Whereas, a, common, understanding, of, these,...
24    [No, one, shall, be, held, guilty, of, any, pe...
30    [Article, 14, Everyone, has, the, right, to, s...
15    [Article, 4, No, one, shall, be, held, in, sla...
28    [Article, 13, Everyone, has, the, right, to, f...
43    [No, one, may, be, compelled, to, belong, to, ...
11    [They, are, endowed, with, reason, and, consci...
29    [Everyone, has, the, right, to, leave, any, co...
27    [Everyone, has, the, right, to, the, protectio...
Name: English, dtype: object

In [267]:
def prompt(train_sentences_true, train_sentences_false, test_s):
    p = "Each of the following sentences is labeled 'Yes' if it follows a secret rule and labeled 'No' otherwise:\n\n"
    
    for s in train_sentences_true:
        p += "\"" + detokenize(s)[:50] + "\": Yes;\n"

    for s in train_sentences_false:
        p += "\"" + detokenize(s)[:50] + "\": No;\n"

#    p += "\nFirst, state the condition as briefly as possible. Then, state 'Label: True' if the following sentence satisfies the condition and 'Label: False' otherwise.\n\n"
#    p += "\nReply 'Condition: <describe the secret condition briefly>' and on a new line write 'Condition satisfied: <True or False>' where the condition is satisfied if the next sentence satisfies the condition:\n\n"
    p += "\nYour job is to learn the secret rule, thinking carefully about the previous examples. Does the sentence:\n\n"
    p += "\"" + detokenize(test_s)[:50] + "\""
    p += "\n\nfollow the rule? Give a 'Yes' or 'No' answer, with no explanation."
    return p

In [211]:
def get_response(p):
    response = client.completions.create(
                model = "gpt-3.5-turbo-instruct",
                prompt = p
                )
    return response.choices[0].text

In [376]:
example_language = 'Norwegian'

In [377]:
train_true = build_training_sentences_true(corpus, example_language)
train_false = build_training_sentences_false(corpus, example_language)
test_true = build_test_sentences_true(corpus, example_language, train_true)
test_false = build_test_sentences_false(corpus, example_language, train_false)

In [378]:
print(prompt(train_true, train_false, test_true.iloc[0]))

Each of the following sentences is labeled 'Yes' if it follows a secret rule and labeled 'No' otherwise:

"Heller ikke skal det kunne idømmes strengere straf": Yes;
"Ekteskap må bare inngås etter fritt og fullt samty": Yes;
"Enhver har rett til et statsborgerskap.": Yes;
"INKCAZO - JIKELELE NGEEMFANELO ZOLUNTU ISINGENISO ": No;
"Artigo 19 ° Todo o indivíduo tem direito à liberda": No;
"Mundu akasajimbikwa magambo pa ulemwa wuwatesile m": No;

Your job is to learn the secret rule, thinking carefully about the previous examples. Does the sentence:

"Enhver har krav på alle de rettigheter og friheter"

follow the rule? Give a 'Yes' or 'No' answer, with no explanation.


In [320]:
train_true, train_false, test_true, test_false = build_train_test_data(corpus, 'English')

In [321]:
p = prompt(train_true, train_false,test_true.iloc[0] )

In [373]:
p = prompt(train_true, train_false,test_true.iloc[0] )

In [374]:
print(p)

Each of the following sentences is labeled 'Yes' if it follows a secret rule and labeled 'No' otherwise:

"Mataupu 9 E leai se tagata e tatau ona saisaitia f": Yes;
"Ua faaeeina atu i a latou le mafaufau lelei ma le ": Yes;
"Mataupu 20 1.": Yes;
"E agavaa uma tagata i puipuig tutusa e faasaga aga": Yes;
"Mataupu 11 1.": Yes;
"O tagata taitoatasi uma e i ai lana aia tatau e tu": Yes;
"E leai se tagata e tatau ona aveeseina lona tofi t": Yes;
"O le a tatau ona faaipoipo le alii ma tamaitai i l": Yes;
"2.": Yes;
"O tagata taitoatasi uma e i ai lana aia tatau i se": Yes;
"Mataupu 16 1.": Yes;
"2.": Yes;
"O tagata taitoatasi uma e i ai lana aia tatau e sa": Yes;
"Mataupu 17 1.": Yes;
"2.": Yes;
"E i le tagata taitoatasi uma le aia tatau e umia a": Yes;
"O le aiga o se vaega faale - natura masani ma maut": Yes;
"E leai se tagata e tatau ona nofosala i so o se so": Yes;
"Mataupu 19 O tagata taitoatasi uma e i ai lana aia": Yes;
"Mataupu 4 E leai se tagata e tatau ona taofia faam": Yes;
"Ie ji

In [265]:
response = get_response(p)

In [266]:
print(response)



Yes


In [344]:
def evaluate_icl(corpus, lang, train_true = None, train_false = None, k_train = 3, k_test = 10, seed = None, verbose = True):

    test_true = build_test_sentences_true(corpus, lang, train_true, test_k = k_test, seed = seed)
    test_false = build_test_sentences_false(corpus, lang, train_false, test_k = k_test, seed = seed)
    
    num_successes = 0
    #test on True
    for s in test_true:
        p = prompt(train_true, train_false, s)
        response = get_response(p)
        if 'Yes' in response:
            num_successes += 1
        else:
            if verbose:
                print('On test sentence: '+ detokenize(s) + ' -- the LLM incorrectly evaluates as No')

    #test on False
    for s in test_false:
        p = prompt(train_true, train_false, s)
        response = get_response(p)
        if 'No' in response:
            num_successes += 1
        else:
            if verbose:
                print('On test sentence: '+ detokenize(s) + ' -- the LLM incorrectly evaluates as Yes')

    accuracy = num_successes/(len(test_true) + len(test_false))

#    print('The LLM\'s accuracy is {}'.format(accuracy))
    return accuracy
        
    

In [338]:
train_true, train_false, test_true, test_false = build_train_test_data(corpus, 'English')

In [345]:
train_false

0     [Ca, chiquítimoi, sipuacë, ', itima, ', icën, .]
1    [Heg, jalankeun, eta, hak, -, hak, teh, boh, k...
2    [Aretikele, 6, Mongwe, le, mongwe, o, na, le, ...
dtype: object

In [350]:
evaluate_icl(corpus, 'English', train_true = train_true, train_false = train_false)

On test sentence: The General Assembly, -- the LLM incorrectly evaluates as No
On test sentence: Now, therefore, -- the LLM incorrectly evaluates as No
On test sentence: 2. -- the LLM incorrectly evaluates as Yes
On test sentence: DIUZ DAIHCIBBET Boux boux miz swhsiengj, liengzsim caeuq cunghgyau dih genzli; gij genzli neix baugva de cwyouz gaijbienq cunghgyau roxnaeuz sinqnyangj, danhduz roxnaeuz cizdij, gunghgaih roxnaeuz mimiz yungh gyauyi, sizcenj, lijbai caeuq gailiz biujsi cunghgyau roxnaeuz sinqnyangj de. -- the LLM incorrectly evaluates as Yes
On test sentence: 3. -- the LLM incorrectly evaluates as Yes
On test sentence: Ebenso darf keine schwerere Strafe als die zum Zeitpunkt der Begehung der strafbaren Handlung angedrohte Strafe verhängt werden. -- the LLM incorrectly evaluates as Yes
On test sentence: Mongwe le mongwe o na le tshwanelo ya kgololosego ya motsamao le bonno me melelwaneng yo naga nngwe le nngwe. -- the LLM incorrectly evaluates as Yes
On test sentence: did oub 

0.6

In [351]:
test_set = languages.sample(30)

In [352]:
test_set

52                        German_Deutsch
124                     Oshiwambo_Ndonga
166                         Tongan_Tonga
160                       Tenek_Huasteco
173                              Urarina
113                NigerianPidginEnglish
60     Hmong_Miao-Sichuan-Guizhou-Yunnan
140         Runyankore-rukiga_Nkore-kiga
22                      Campa_Pajonalino
175                                Vlach
112                      Ngangela_Nyemba
116                            Norwegian
14                        Basque_Euskara
133                              Quechua
104                       Mayan_Yucateco
189                                 Zulu
170                              Tzotzil
69                         Iloko_Ilocano
33                             Chickasaw
118              Norwegian_Norsk-Nynorsk
25                     Cashibo-Cacataibo
97                              Malagasy
21                            Cakchiquel
8                                Arabela
163             

In [355]:
results = {}
for lang in test_set:
    train_true = build_test_sentences_true(corpus, lang, train_true, test_k = 20, seed = None)
    train_false = build_test_sentences_false(corpus, lang, train_false, test_k = 20, seed = None)
    accuracy = evaluate_icl(corpus, lang, train_true, train_false, k_test = 20, verbose = False)
    print('The LLM achieves accuracy {} when learning the rule \"The sentence is in the language {}\".'.format(accuracy, lang))
    results[lang] = accuracy

The LLM achieves accuracy 0.6 when learning the rule "The sentence is in the language German_Deutsch".
The LLM achieves accuracy 0.55 when learning the rule "The sentence is in the language Oshiwambo_Ndonga".
The LLM achieves accuracy 0.55 when learning the rule "The sentence is in the language Tongan_Tonga".
The LLM achieves accuracy 0.6 when learning the rule "The sentence is in the language Tenek_Huasteco".
The LLM achieves accuracy 0.55 when learning the rule "The sentence is in the language Urarina".
The LLM achieves accuracy 0.775 when learning the rule "The sentence is in the language NigerianPidginEnglish".
The LLM achieves accuracy 0.625 when learning the rule "The sentence is in the language Hmong_Miao-Sichuan-Guizhou-Yunnan".
The LLM achieves accuracy 0.575 when learning the rule "The sentence is in the language Runyankore-rukiga_Nkore-kiga".
The LLM achieves accuracy 0.725 when learning the rule "The sentence is in the language Campa_Pajonalino".
The LLM achieves accuracy 0

In [365]:
accuracy_table = pd.DataFrame.from_dict(results, orient = 'index', columns = ['Accuracy'])

In [369]:
accuracy_table.sort_values('Accuracy', ascending = False, inplace=True)

In [370]:
accuracy_table

Unnamed: 0,Accuracy
Norwegian,0.8
NigerianPidginEnglish,0.775
Vlach,0.75
Campa_Pajonalino,0.725
Cakchiquel,0.675
Tojol-abal,0.65
Quechua,0.625
Tzotzil,0.625
Hmong_Miao-Sichuan-Guizhou-Yunnan,0.625
IrishGaelic_Gaeilge,0.625
