# Training Data

In [1]:
import os,random,math
from gensim.models import Word2Vec

TRAINING_DIR="C:/Users/chris/ANLE CW/sentence-completion/Holmes_Training_Data"

def get_training_testing(training_dir=TRAINING_DIR,split=1):
    filenames=os.listdir(training_dir)
    print("There are {} files in the training directory: {}".format(len(filenames),training_dir))
    random.shuffle(filenames)
    return(filenames[:int(len(filenames)*split)],filenames[int(len(filenames)*split):])

trainingfiles,heldoutfiles=get_training_testing()

There are 522 files in the training directory: C:/Users/chris/ANLE CW/sentence-completion/Holmes_Training_Data


# MODELS

In [None]:
from nltk import word_tokenize as tokenize
import operator

#Language Model Class
class language_model():
    def __init__(self,trainingdir=TRAINING_DIR,files=[]):
        self.training_dir=trainingdir
        self.files=files
        self.train()
    
    #TRAIN EACH OF THE MODELS
    #Create each models dictionary and then process the files
    def train(self):
        self.unigram={}
        self.bigram={}
        self.trigram={}
        self.word2vecpairs=[]
        self.word2vecsentences=[]
        
        self._processfiles()
        self.word2vecmodel = Word2Vec(self.word2vecsentences, min_count=0)
        self.word2vecwords = (self.word2vecmodel.wv.key_to_index)
        print(self.word2vecpairs[0:3])
        self._discount()
        self._kneser_ney()
        self._convert_to_probs()

    #Function to Process each file ready for training
    def _processfiles(self):
        for afile in self.files:
            print("Processing {}".format(afile))
            try:
                with open(os.path.join(self.training_dir,afile)) as instream:
                    for line in instream:
                        line=line.rstrip()
                        if len(line)>0:
                            self._processline(line)
            except UnicodeDecodeError:
                print("UnicodeDecodeError processing {}: ignoring file".format(afile))

                
                
####Functions about probabs
    #Function tp convert counts into probabs
    def _convert_to_probs(self):
        #convert unigram prob
        self.unigram={k:v/sum(self.unigram.values()) for (k,v) in self.unigram.items()}
        #conert bigram probs
        self.bigram={key:{k:v/sum(adict.values()) for (k,v) in adict.items()} for (key,adict) in self.bigram.items()}
        #convert trigram probs
        self.trigram={key:{k:v/sum(adict.values()) for (k,v) in adict.items()} for (key,adict) in self.trigram.items()}
        
        
    #Function  to retrieve probabs from a model
    def get_prob(self,token,context="",methodparams={}):
        unidist=self.unigram
        
        #for given method return probability
        if methodparams.get("method","unigram")=="unigram":
            return self.unigram.get(context[0],self.unigram.get("__UNK",0))
        elif methodparams.get("method","bigram")=="bigram":
            #check if optional smoothing is enabled
            if methodparams.get("smoothing","kneser-ney")=="kneser-ney":
                unidist=self.knb
            bigram=self.bigram.get(token,self.bigram.get("__UNK",{}))
            big_probab=bigram.get(context[0],self.bigram.get("__UNK",0))
            lmbda=bigram.get("__DISCOUNT",bigram.get("__UNK",0))
        elif methodparams.get("method","trigram")=="trigram":
            #check if optional smoothing is enabled
            if methodparams.get("smoothing","kneser-ney")=="kneser-ney":
                unidist=self.knt
            trigram=self.trigram.get(token,self.trigram.get("__UNK",{}))
            big_probab=trigram.get(context[0],trigram.get("__UNK",0))
            lmbda=trigram.get("__DISCOUNT",trigram.get("__UNK",0))
        else:
            print("Not implemented: {}".format(method))
            return 0
        
        uni_probab=unidist.get(token,unidist.get("__UNK",0))
        print(token, context[0], big_probab+lmbda*uni_probab)
        return (big_probab+lmbda*uni_probab )

    #Store a probab mass in each dictionary (bigram and trigram)
    def _discount(self,discount=0.75):
        #discount each n-gram count by a small fixed amount
        self.bigram={k:{kk:value-discount for (kk,value) in adict.items()}for (k,adict) in self.bigram.items()}
        self.trigram={k:{kk:value-discount for (kk,value) in adict.items()}for (k,adict) in self.trigram.items()}
        
        #for each word, store the total amount of the discount so that the total is the same reserving as probab mass
        for k in self.bigram.keys():
            lamb=len(self.bigram[k])
            self.bigram[k]["__DISCOUNT"]=lamb*discount
        for k in self.trigram.keys():
            lamb=len(self.trigram[k])
            self.trigram[k]["__DISCOUNT"]=lamb*discount

    #function used for optional smoothing in bigrams and trigrams       
    def _kneser_ney(self):
        #work out kneser-ney unigram probabilities
        #count the number of contexts each word has been seen in
        self.knb={}
        self.knt={}
        for (k,adict) in self.bigram.items():
            for kk in adict.keys():
                self.knb[kk]=self.knb.get(kk,0)+1
        for (k,adict) in self.trigram.items():
            for kk in adict.keys():
                self.knt[kk]=self.knt.get(kk,0)+1
    
####Functions for processing
    
    #Function for processing each line of a file
    ##Tokenises each line, adds start and end to end of line, 
    ##counts No. tokens and adds it to dictionary with token as key
    #also stores whole sentences and every two words as a pair for word2vec
    def _processline(self,line):
        tokens=["_START"]+tokenize(line)+["_END"]
        w1="_END"
        w2=""
        self.word2vecsentences.append(tokens)
        for token in tokens:
            #add to unigram
            self.unigram[token]=self.unigram.get(token,0)+1
            
            #add to bigram
            #get current words associated words dictionary
            current=self.bigram.get((w1),{})
            #adds 1 to tokens frequency in dictionary
            current[token]=current.get(token,0)+1
            #make previous word's dictionary = current dictionary
            self.bigram[w1]=current            
            
            #add to trigram
            #get phrases associated words dictionary
            current=self.trigram.get((w2+" "+w1),{})
            #print((w2+" "+w1))
            #adds 1 to tokens frequency in dictionary of prev
            current[token]=current.get(token,0)+1
            #make previous word's dictionary = current dictionary
            self.trigram[(w2+" "+w1)]=current
            
            #add to word2vec
            self.word2vecpairs.append([w1,token])
            
            #move words along
            w2=w1
            w1=token

#train language model on all data
mylm=language_model(files=trainingfiles)

Processing LESMS10.TXT
Processing SNOWI10.TXT
Processing SKYGM10.TXT
Processing CNTMI10.TXT
Processing WUTHR10.TXT
Processing TLFNS10.TXT
Processing LLIRM10.TXT
Processing MARIA10.TXT
Processing 1DINA10.TXT
Processing PETER16.TXT
Processing SCARR10.TXT
Processing INTEP10.TXT
Processing AZNIT10.TXT
Processing THEAM10.TXT
Processing BBETC10.TXT
Processing LDORT10.TXT
Processing HHOHG10.TXT
Processing PALIN10.TXT
Processing CONFI10.TXT
Processing LAZYA10.TXT
Processing JUSDV10.TXT
Processing WHTCO10.TXT
Processing 7GABL10.TXT
Processing TARZ310.TXT
Processing SIOUX10.TXT
Processing HNTMN10.TXT
Processing UNCTR10.TXT
Processing KIDS110.TXT
Processing PORAP10.TXT
Processing FRANK13.TXT
Processing RWFRS10.TXT
Processing BMINE10.TXT
Processing LFSTA10.TXT
Processing SBRUN10.TXT
Processing JBALL10.TXT
Processing TARZ510.TXT
Processing GSILX10.TXT
Processing BLIXN10.TXT
Processing SISTR10.TXT
Processing TCONF10.TXT
Processing ACHOE10.TXT
Processing CHOUR10.TXT
Processing VIFRY10.TXT
Processing 

# GET MRSCC DATA

In [3]:
import pandas as pd, csv
from nltk import word_tokenize as tokenize

#gets sentence completion data and stores each question in a question class
#adds the answers of each question to this class too
parentdir="C:/Users/chris/ANLE CW/sentence-completion"
questions=os.path.join(parentdir,"testing_data.csv")
answers=os.path.join(parentdir,"test_answer.csv")
with open(questions) as instream:
    lines=list(csv.reader(instream))
qs_df=pd.DataFrame(lines[1:],columns=lines[0])
qs_df.head()

tokens=[tokenize(q) for q in qs_df['question']]

# SCC Reader

In [4]:
#class for reading each question to complete the MRSCC
class scc_reader:
    
    def __init__(self,qs=questions,ans=answers):
        self.qs=qs
        self.ans=ans
        self.read_files()
        
    def read_files(self):
        
        #read in the question file
        with open(self.qs) as instream:
            csvreader=csv.reader(instream)
            qlines=list(csvreader)
        
        #store the column names as a reverse index so they can be used to reference parts of the question
        question.colnames={item:i for i,item in enumerate(qlines[0])}
        
        #create a question instance for each line of the file (other than heading line)
        self.questions=[question(qline) for qline in qlines[1:]]
        
        #read in the answer file
        with open(self.ans) as instream:
            csvreader=csv.reader(instream)
            alines=list(csvreader)
            
        #add answers to questions so predictions can be checked    
        for q,aline in zip(self.questions,alines[1:]):
            q.add_answer(aline)
        
    def get_field(self,field):
        return [q.get_field(field) for q in self.questions] 
    
    def predict(self,method="chooseA"):
        return [q.predict(method=method) for q in self.questions]
    
    def predict_and_score(self,method="chooseA"):
        scores=[q.predict_and_score(method=method) for q in self.questions]
        return sum(scores)/len(scores)

# Question Class

In [5]:
import numpy as np
import random as rand

#class to store the information of each question as well predict answers and score the accuracy of each model
class question:
   
    def __init__(self,aline):
        self.fields=aline
    
    def get_field(self,field):
        return self.fields[question.colnames[field]]
    
    def add_answer(self,fields):
        self.answer=fields[1]
   
    def chooseA(self):
        return("a")
    
    def predict(self,method="chooseA",model=mylm):
        #eventually there will be lots of methods to choose from
        if method=="random":
            return self.random_choice()
        elif method=="unigram":
            return self.choose_unigram(lm=mylm)
        elif method=="bigram":
            return self.choose(mylm,method=method)
        elif method=="trigram":
            return self.choose(mylm,method=method)
        elif method=="word2vec":
            return self.choose(mylm,method=method)
        else:
            return self.chooseA()

    def predict_and_score(self,method="chooseA"):
        #compare prediction according to method with the correct answer
        #return 1 or 0 accordingly
        prediction=self.predict(method=method)
        print("predict",prediction,"answer",self.answer)
        print("")
        if prediction ==self.answer:
            return 1
        else:
            return 0
    
    #gets line with start and end added
    def get_tokens(self):
        return ["__START"]+tokenize(self.fields[question.colnames["question"]])+["__END"]
    
    #calculates the cosine of two values
    def cosine(self,a,b):
        num=(a*b)
        adota=(a*a)
        bdotb=(b*b)
        if adota == 0 or bdotb==0 or num ==0:
            return 0
        else:
            print(adota,"*",bdotb,"=",adota*bdotb)
            return num/math.sqrt(sum(adota)*sum(bdotb))
    
######RANDOM CODE
    def random_choice(self):
        choices=["a","b","c","d","e"]
        choice=np.random.choice(choices)
        return choice
    
######UNIGRAM CODE
    def choose_unigram(self,lm):
        choices=["a","b","c","d","e"]      
        probabs=[lm.unigram.get(self.get_field(choice+")"),0) for choice in choices]
        maxprob=max(probabs)
        bestchoices=[choice for choice,probab in zip(choices,probabs) if probab == maxprob]
        return np.random.choice(bestchoices)
    
######BIGRAM CODE
    #gets word to the left (Context)
    def get_context_bigram(self,window=1,target="_____",method="bigram"):        
        found=-1
        sent_tokens=self.get_tokens()
        #go through line
        for i,token in enumerate(sent_tokens):
            #once word has been found i=its pointer
            if token==target:
                found=i
                break  
        #once it has been found return the one previous to it if left or the one after if right   
        if found>-1:
            print(i)
            return sent_tokens[i-window:i]
        else:
            return []
    
######TRIGRAM CODE
    #gets word to the left (Context)
    def get_context_trigram(self,window=2,target="_____",method="trigram"):
        found=-1
        sent_tokens=self.get_tokens()
        #go through line
        for i,token in enumerate(sent_tokens):
            #once word has been found i=its pointer
            if token==target:
                found=i
                break  
        #once it has been found return the two previous to it
        if found>-1:
            words=''
            for i in sent_tokens[i-window:i]:
                words=words+i+" "
            return words
        else:
            return []
        
#######Word2Vec Code
    def get_context_word2vec(self,window=2,target="_____",method="word2vec"):
        found=-1
        sent_tokens=self.get_tokens()
        #go through line
        for i,token in enumerate(sent_tokens):
            #once word has been found i=its pointer
            if token==target:
                found=i
                break  
        #once it has been found return the one previous to it if left or the one after if right   
        if found>-1:
            tokens=sent_tokens#[1:len(sent_tokens)-1]
            sentence=[]
            for token in tokens:
                if token!="_____":
                    sentence.append(token)
            return sentence
        else:
            return []
        
    #function for predicting with word2vec  
    #given pair for training, context and current choice
    def predict_word2vec(self, tp, context, choice):
        wvmodel=mylm.word2vecmodel

        #for each question return list of most possible to least possible choices
        #try:
        #print(context)
        answers=(wvmodel.predict_output_word(context,topn=250000))
        #except:
            #return 0
        try:
            for answer in answers:
                #print("answer",answer[0])
                if answer[0]==choice[0]:
                    return answer[1]
            return 0
        except:
            return 0
        
    
#Choose best based on context from Bigram or Trigram
    def choose(self,lm,method="Bigram",choices=[]):
        #reset choices if empty
        if choices==[]:
            choices=["a","b","c","d","e"]
        #get right word (context)
        if method=="bigram":
            context=self.get_context_bigram(window=1,method=method)
            probabs=[lm.get_prob(context[0],[self.get_field(choice+")")],methodparams={"method":method.split("_")[0]}) for choice in choices]
        elif method=="trigram":
            context=self.get_context_trigram(window=2,method=method)
            probabs=[lm.get_prob(context[:len(context)-1],[self.get_field(choice+")")],methodparams={"method":method.split("_")[0]}) for choice in choices]   
        elif method=="word2vec":
            i=rand.randrange(len(mylm.word2vecpairs))
            context=self.get_context_word2vec(window=2,method=method)
            probabs=[(self.predict_word2vec(mylm.word2vecpairs[i], context, [self.get_field(c+")")])) for c in choices]
            
            
        maxprobab=max(probabs)
        print(probabs, maxprobab)
        bestchoices=[choice for choice,probab in zip(choices,probabs) if probab == maxprobab]
        print("choice",bestchoices)
        return np.random.choice(bestchoices) 
    
SCC = scc_reader()

In [10]:
SCC.predict_and_score(method=("trigram"))

and are crying 0.0
and are instantaneously 0.0
and are residing 0.0
and are matched 0.0
and are walking 0.0008267195767195767
[0.0, 0.0, 0.0, 0.0, 0.0008267195767195767] 0.0008267195767195767
choice ['e']
predict e answer c

flowers arranged daintily 0.0
flowers arranged privately 0.0
flowers arranged inadvertently 0.0
flowers arranged miserably 0.0
flowers arranged comfortably 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict a answer a

, the gods 0.00018242234421131804
, the moon 0.0011254055389036698
, the panther 0.0003844901716453934
, the guard 0.00012629239214629711
, the country-dance 0.0
[0.00018242234421131804, 0.0011254055389036698, 0.0003844901716453934, 0.00012629239214629711, 0.0] 0.0011254055389036698
choice ['b']
predict b answer d

off , rubbing 0.0
off , doubling 0.0
off , paid 0.0
off , naming 0.0
off , carrying 0.0003370180641682394
[0.0, 0.0, 0.0, 0.0, 0.0003370180641682394] 0.0003370180641682394
choice ['e']
predict e answer c

hand a sup

dark , rosy 0.0
dark , childish 0.0
dark , fearsome 0.0
dark , colorless 0.0
dark , yellow 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict c answer c

fat man cast 0.0
fat man folded 0.0
fat man hastened 0.0
fat man jingled 0.0
fat man winked 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict a answer a

stair , devoured 0.0
stair , translated 0.0
stair , unlocked 0.0
stair , ascended 0.0
stair , occupied 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict e answer c

moment as startled 0.0
moment as tired 0.0
moment as thin 0.0
moment as wise 0.0
moment as clever 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict c answer a

but is torn 0.0
but is softened 0.0
but is higher 0.0
but is barred 0.0
but is constantly 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict a answer d

his hat closed 0.0
his hat slouched 0.0006688068485821295
his hat wheeled 0.0
his hat pulle

a royal concubine 0.0
[0.0, 0.0012690355329949238, 0.021573604060913704, 0.0012690355329949238, 0.0] 0.021573604060913704
choice ['c']
predict c answer d

You must contradict 0.0
You must persuade 0.0
You must cover 0.0
You must lock 0.0001086484137331595
You must judge 0.0005432420686657974
[0.0, 0.0, 0.0, 0.0001086484137331595, 0.0005432420686657974] 0.0005432420686657974
choice ['e']
predict e answer d

brought a variety 0.0
brought a gush 0.0
brought a bunch 0.0005208333333333333
brought a pair 0.0026041666666666665
brought a branch 0.0
[0.0, 0.0, 0.0005208333333333333, 0.0026041666666666665, 0.0] 0.0026041666666666665
choice ['d']
predict d answer b

Mortimer had stayed 0.0
Mortimer had submitted 0.0
Mortimer had sunk 0.0
Mortimer had risen 0.0
Mortimer had spoken 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict e answer a

long thin hedge 0.0
long thin cane 0.0
long thin dilemma 0.0
long thin nose 0.0
long thin pickle 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
ch

and two pairs 0.004600253807106599
and two books 0.0
and two bottles 0.004600253807106599
and two drops 0.0
and two tins 0.0007931472081218274
[0.004600253807106599, 0.0, 0.004600253807106599, 0.0, 0.0007931472081218274] 0.004600253807106599
choice ['a', 'c']
predict c answer e

bottle was painted 0.0
bottle was quartered 0.0
bottle was downstairs 0.0
bottle was published 0.0
bottle was interested 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict c answer c

you would slip 0.0
you would increase 0.0
you would pull 0.0002593899149201079
you would enlarge 0.0
you would appreciate 0.00046690184685619425
[0.0, 0.0, 0.0002593899149201079, 0.0, 0.00046690184685619425] 0.00046690184685619425
choice ['e']
predict e answer a

had no difficulty 0.012096215167059476
had no hesitation 0.0018369610425620407
had no faith 0.001143768196312214
had no scruple 0.0008664910578122833
had no luck 0.001143768196312214
[0.012096215167059476, 0.0018369610425620407, 0.00114376819631221

of the majesty 1.8105633438518442e-05
of the commencement 7.759557187936475e-06
[0.0007319848947286742, 0.0, 5.6041246357318986e-05, 1.8105633438518442e-05, 7.759557187936475e-06] 0.0007319848947286742
choice ['a']
predict a answer a

have already shouted 0.0
have already sneered 0.0
have already arranged 0.00031928480204342275
have already bought 0.0
have already consulted 0.0
[0.0, 0.0, 0.00031928480204342275, 0.0, 0.0] 0.00031928480204342275
choice ['c']
predict c answer c

but his feelings 0.0025951557093425604
but his circumstances 0.0006178942165101335
but his impulses 0.0
but his looks 0.00012357884330202668
but his boots 0.0
[0.0025951557093425604, 0.0006178942165101335, 0.0, 0.00012357884330202668, 0.0] 0.0025951557093425604
choice ['a']
predict a answer e

returned he deserted 0.0
returned he invoked 0.0
returned he joined 0.0
returned he loosened 0.0
returned he pressed 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict b answer c

Together we rolled 

round and examined 0.0
round and relieved 0.0
round and embraced 0.00024826216484607745
round and promised 0.0
round and liked 0.0
[0.0, 0.0, 0.00024826216484607745, 0.0, 0.0] 0.00024826216484607745
choice ['c']
predict c answer a

rather , returns 0.0
rather , stirred 0.0
rather , leaned 0.0
rather , glared 0.0
rather , shot 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict d answer a

against every taste 0.0
against every job 0.0
against every admission 0.0
against every coast 0.0
against every experiment 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict c answer c

through my knobs 0.0
through my compass 0.0
through my doubts 0.0
through my fingers 0.017553191489361703
through my telescope 0.0
[0.0, 0.0, 0.0, 0.017553191489361703, 0.0] 0.017553191489361703
choice ['d']
predict d answer e

The servants remembered 0.0026595744680851063
The servants liked 0.0
The servants grinned 0.0
The servants recollected 0.0
The servants deny 0.0
[0

along the workmanship 0.0
along the shores 0.0027935905545013705
along the lapse 0.0
[0.014178789795488088, 0.0, 0.0, 0.0027935905545013705, 0.0] 0.014178789795488088
choice ['a']
predict a answer a

a size higher 0.0
a size sadder 0.0
a size harder 0.0
a size sooner 0.0
a size larger 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict c answer e

beating and splashing 0.0
beating and splitting 0.0
beating and leaping 0.0
beating and staggering 0.0
beating and growing 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict a answer a

observed a vision 0.0
observed a picture 0.0
observed a nerve 0.0
observed a hound 0.0
observed a carriage 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict a answer e

easy to exaggerate 0.0009593246354566385
easy to contemplate 0.0
easy to improve 0.0
easy to trace 0.004796623177283192
easy to guess 0.010168841135840368
[0.0009593246354566385, 0.0, 0.0, 0.004796623177283192, 0.0101688411

set and mischief 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict d answer d

in the privacy 0.00010337574235768606
in the shape 0.0016794812635212474
in the history 0.0010022952411201736
in the crowd 0.0012479999041152534
in the claims 0.0
[0.00010337574235768606, 0.0016794812635212474, 0.0010022952411201736, 0.0012479999041152534, 0.0] 0.0016794812635212474
choice ['b']
predict b answer d

an hour matters 0.0
an hour kings 0.0
an hour creatures 0.0
an hour volume 0.0
an hour Lucy 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict c answer a

Besides this incoherent 0.0
Besides this collective 0.0
Besides this practical 0.0
Besides this unspeakable 0.0
Besides this preliminary 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict b answer e

example and holding 0.0
example and sinking 0.0
example and jostling 0.0
example and slipping 0.0
example and prompting 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c',

predict a answer c

am an acre 0.0
am an interpreter 0.0
am an indemnity 0.0
am an undertone 0.0
am an improvisation 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict b answer b

' He dashed 0.0
' He appraised 0.0
' He gathered 0.0
' He bade 0.0
' He begged 0.0003255208333333333
[0.0, 0.0, 0.0, 0.0, 0.0003255208333333333] 0.0003255208333333333
choice ['e']
predict e answer a

's voice stared 0.0
's voice sank 0.002484709480122324
's voice hummed 0.0
's voice limped 0.0
's voice shot 0.0
[0.0, 0.002484709480122324, 0.0, 0.0, 0.0] 0.002484709480122324
choice ['b']
predict b answer b

near the memory 0.0
near the smell 0.0
near the balance 0.0
near the dexterity 0.0
near the borders 0.0014300134589502018
[0.0, 0.0, 0.0, 0.0, 0.0014300134589502018] 0.0014300134589502018
choice ['e']
predict e answer e

in his food 7.67035866597122e-06
in his trial 7.67035866597122e-06
in his service 0.0012962906145491363
in his pyjamas 9.971466265762588e-05
in his tracks 0.00105083

to several footprints 0.0
to several truths 0.0
to several trenches 0.0
to several accidents 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict a answer b

Here we dismissed 0.0
Here we grasped 0.0
Here we cudgeled 0.0
Here we nursed 0.0
Here we penetrated 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict e answer a

forward , scraped 0.0
forward , wrung 0.0
forward , remembering 0.0
forward , ate 0.0
forward , poked 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict b answer b

and singular proposal 0.0
and singular history 0.0
and singular difficulty 0.0
and singular applicant 0.0
and singular sauce 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict c answer c

Both cases asleep 0.0
Both cases decided 0.0
Both cases outdoors 0.0
Both cases rapt 0.0
Both cases sentimental 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict a answer b

she had knelt 0.0001542843579397

us very apprehensive 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict d answer d

England a morose 0.0
England a shrewd 0.0
England a brave 0.0
England a surprising 0.0
England a nobler 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict d answer a

of those useful 0.00020871597929537485
of those admirable 4.1743195859074974e-05
of those hysterical 4.1743195859074974e-05
of those ambitious 0.00020871597929537485
of those vague 4.1743195859074974e-05
[0.00020871597929537485, 4.1743195859074974e-05, 4.1743195859074974e-05, 0.00020871597929537485, 4.1743195859074974e-05] 0.00020871597929537485
choice ['a', 'd']
predict a answer c

, and tied 0.0001478845511493942
, and encircled 2.3325638982554287e-06
, and kicked 4.338568850755098e-05
, and waited 0.0005192287237516585
, and watered 6.064666135464115e-06
[0.0001478845511493942, 2.3325638982554287e-06, 4.338568850755098e-05, 0.0005192287237516585, 6.064666135464115e-06] 0.000519228723751658

predict d answer a

must be prompt 0.00013039849780930525
must be wonderful 0.0008606300855414146
must be convicted 0.0
must be described 0.00013039849780930525
must be hungry 0.0017994992697684122
[0.00013039849780930525, 0.0008606300855414146, 0.0, 0.00013039849780930525, 0.0017994992697684122] 0.0017994992697684122
choice ['e']
predict e answer a

about the province 0.0
about the size 0.00843189798558492
about the generosity 0.0
about the tendency 0.0
about the medium 0.00011550545185732766
[0.0, 0.00843189798558492, 0.0, 0.0, 0.00011550545185732766] 0.00843189798558492
choice ['b']
predict b answer b

life was irregular 0.0
life was empty 0.0
life was bewitched 0.0
life was ended 0.0003180661577608143
life was unavoidable 0.0
[0.0, 0.0, 0.0, 0.0003180661577608143, 0.0] 0.0003180661577608143
choice ['d']
predict d answer a

who came floating 0.0
who came crawling 0.0
who came sketching 0.0
who came leaping 0.0
who came bouncing 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c'

did not cease 0.0014898921045456315
did not rejoice 8.764071203209597e-05
did not repent 0.00012659213960191642
did not confess 0.00020449499474155727
[0.0023857749386515014, 0.0014898921045456315, 8.764071203209597e-05, 0.00012659213960191642, 0.00020449499474155727] 0.0023857749386515014
choice ['a']
predict a answer a

man and delivered 0.0
man and withered 0.0
man and hurried 0.0
man and rode 0.0
man and reduced 0.0
[0.0, 0.0, 0.0, 0.0, 0.0] 0.0
choice ['a', 'b', 'c', 'd', 'e']
predict c answer c

his eyes shining 0.0022232246423508186
his eyes wandering 0.0009343987627271555
his eyes kindling 0.00016110323495295787
his eyes knocking 0.0
his eyes growing 0.000676633586802423
[0.0022232246423508186, 0.0009343987627271555, 0.00016110323495295787, 0.0, 0.000676633586802423] 0.0022232246423508186
choice ['a']
predict a answer a

we must disgrace 0.0
we must choose 0.000991626267078008
we must destroy 0.00011018069634200089
we must aggravate 0.0
we must familiarize 0.0
[0.0, 0.000991626

very much safer 0.0
very much offended 0.001390728476821192
[0.0, 0.019403973509933774, 0.028410596026490067, 0.0, 0.001390728476821192] 0.028410596026490067
choice ['c']
predict c answer b

she had stated 5.9340137669119395e-05
she had lunched 1.1868027533823878e-05
she had fainted 0.00020175646807500593
she had vanished 0.000391644908616188
she had anticipated 0.0004391170187514835
[5.9340137669119395e-05, 1.1868027533823878e-05, 0.00020175646807500593, 0.000391644908616188, 0.0004391170187514835] 0.0004391170187514835
choice ['e']
predict e answer c

so the connection 0.0
so the devil 0.0005516328331862312
so the villain 0.0
so the jug 0.0
so the baby 0.0
[0.0, 0.0005516328331862312, 0.0, 0.0, 0.0] 0.0005516328331862312
choice ['b']
predict b answer c

upon a shelf 0.003744168917260005
upon a hill 0.0030076110974711517
upon a nail 0.0007979376381045912
upon a field 0.0017800147311563957
upon a stone 0.005462803830100663
[0.003744168917260005, 0.0030076110974711517, 0.000797937638104

0.2826923076923077

In [11]:
SCC.predict_and_score(method=("bigram"))

19
are crying 434.917845253603
are instantaneously 434.91767725327213
are residing 434.91767725327213
are matched 434.9176798378926
are walking 434.91798999234953
[434.917845253603, 434.91767725327213, 434.91767725327213, 434.9176798378926, 434.91798999234953] 434.91798999234953
choice ['e']
predict e answer c

17
arranged daintily 20.498133256748993
arranged privately 20.498276852383686
arranged inadvertently 20.498133256748993
arranged miserably 20.498133256748993
arranged comfortably 20.498276852383686
[20.498133256748993, 20.498276852383686, 20.498133256748993, 20.498133256748993, 20.498276852383686] 20.498276852383686
choice ['b', 'e']
predict e answer a

10
the gods 567.6039501511009
the moon 567.6045612809211
the panther 567.6037793356696
the guard 567.6039063409255
the country-dance 567.6037395082375
[567.6039501511009, 567.6045612809211, 567.6037793356696, 567.6039063409255, 567.6037395082375] 567.6045612809211
choice ['b']
predict b answer d

5
, rubbing 1613.8567647411626
, 

the devil 567.6043510805847
the snow 567.6042351385045
the challenge 567.6037753529265
the illusion 567.6037691575481
[567.604005466979, 567.6043510805847, 567.6042351385045, 567.6037753529265, 567.6037691575481] 567.6043510805847
choice ['b']
predict b answer a

3
he vanished 127.6849584476581
he bent 127.68535387041996
he leant 127.68495185727875
he wept 127.68501336748614
he hovered 127.68491890538192
[127.6849584476581, 127.68535387041996, 127.68495185727875, 127.68501336748614, 127.68491890538192] 127.68535387041996
choice ['b']
predict b answer a

3
a scoundrel 486.629730686528
a muddle 486.62968060075605
a soldier 486.6302713852017
a whim 486.6296862923211
a whisper 486.6302861832707
[486.629730686528, 486.62968060075605, 486.6302713852017, 486.6296862923211, 486.6302861832707] 486.6302861832707
choice ['e']
predict e answer e

26
to pawn 450.32264068240653
to contradict 450.3227197924007
to invoke 450.3226324034536
to store 450.3226489613594
to subdue 450.3227105935641
[450.322

3
was educated 523.6572469935246
was thin 523.6573061979649
was calm 523.6573617021277
was mumbling 523.657174838113
was clever 523.6572617946347
[523.6572469935246, 523.6573061979649, 523.6573617021277, 523.657174838113, 523.6572617946347] 523.6573617021277
choice ['c']
predict c answer b

7
a candle 486.630143894146
a knife 486.63010974475606
a apron 486.6296407598012
a bulldog 486.62965328124415
a motor-car 486.6296623877481
[486.630143894146, 486.63010974475606, 486.6296407598012, 486.62965328124415, 486.6296623877481] 486.630143894146
choice ['a']
predict a answer d

16
be charmed 107.99063071084923
be safe 107.99180085219858
be forced 107.9912414220714
be explained 107.99107359303325
be defeated 107.99060273934288
[107.99063071084923, 107.99180085219858, 107.9912414220714, 107.99107359303325, 107.99060273934288] 107.99180085219858
choice ['b']
predict b answer d

5
been misinformed 65.02656758662661
been abroad 65.02680908818671
been overheard 65.0265482665018
been asleep 65.0274

the aeroplane 567.6038120826695
the comfort 567.6038315538585
the energy 567.6038014620209
the preference 567.6037634046968
[567.603823145845, 567.6038120826695, 567.6038315538585, 567.6038014620209, 567.6037634046968] 567.6038315538585
choice ['c']
predict c answer d

5
so bewildered 270.6956584150954
so dumfounded 270.6955794641383
so prearranged 270.69555613771917
so astonished 270.6959455094849
so intense 270.69603881516144
[270.6956584150954, 270.6955794641383, 270.69555613771917, 270.6959455094849, 270.69603881516144] 270.69603881516144
choice ['e']
predict e answer c

9
his civility 371.9389206882411
his bravery 371.9389673436614
his weakness 371.93913677650335
his kindness 371.9391441431486
his benevolence 371.93892559933795
[371.9389206882411, 371.9389673436614, 371.93913677650335, 371.9391441431486, 371.93892559933795] 371.9391441431486
choice ['d']
predict d answer c

4
to remonstrate 450.3226590800796
to weep 450.32283569774097
to harmonize 450.32262504438444
to perish 450.

choice ['a']
predict a answer a

8
were absent 651.1870605116121
were propagated 651.186959152128
were falling 651.1872885704513
were pouring 651.1871175263219
were distinguished 651.1872315557415
[651.1870605116121, 651.186959152128, 651.1872885704513, 651.1871175263219, 651.1872315557415] 651.1872885704513
choice ['c']
predict c answer a

3
had soaked 281.53845949730965
had performed 281.53865113879266
had spotted 281.53843885899613
had imbibed 281.5385538438859
had released 281.5385007739368
[281.53845949730965, 281.53865113879266, 281.53843885899613, 281.5385538438859, 281.5385007739368] 281.53865113879266
choice ['b']
predict b answer c

4
a moon 486.629705643642
a quarter 486.6310647893609
a prolonged 486.6297591443529
a mere 486.6310886939338
a century 486.62998339201334
[486.629705643642, 486.6310647893609, 486.6297591443529, 486.6310886939338, 486.62998339201334] 486.6310886939338
choice ['d']
predict d answer b

3
may flourish 116.05484929140108
may kiss 116.05524620018211
ma

predict a answer b

14
is examined 488.30014310294536
is passing 488.30034179832
is charmed 488.30014733050655
is shouted 488.3001515580677
is preached 488.3001557856289
[488.30014310294536, 488.30034179832, 488.30014733050655, 488.3001515580677, 488.3001557856289] 488.30034179832
choice ['b']
predict b answer b

12
exercising enormous 9.331858407079645
exercising actual 9.331858407079645
exercising original 9.331858407079645
exercising hardware 9.331858407079645
exercising average 9.331858407079645
[9.331858407079645, 9.331858407079645, 9.331858407079645, 9.331858407079645, 9.331858407079645] 9.331858407079645
choice ['a', 'b', 'c', 'd', 'e']
predict b answer a

10
the honour 567.6040846793162
the architecture 567.6037505714131
the rays 567.6038275711153
the clump 567.6037572093185
the sons 567.6038992604932
[567.6040846793162, 567.6037505714131, 567.6038275711153, 567.6037572093185, 567.6038992604932] 567.6040846793162
choice ['a']
predict a answer b

6
the box 567.6042453166259
the 

neatly whitewashed 39.86349693251533
neatly dressed 39.890081799591
neatly mended 39.85940695296523
neatly soft 39.85889570552147
neatly neglected 39.85889570552147
[39.86349693251533, 39.890081799591, 39.85940695296523, 39.85889570552147, 39.85889570552147] 39.890081799591
choice ['b']
predict b answer b

6
to commit 450.3228200597189
to carry 450.324325909259
to offer 450.32337750921266
to scold 450.32269495554203
to grant 450.32277866495446
[450.3228200597189, 450.324325909259, 450.32337750921266, 450.32269495554203, 450.32277866495446] 450.324325909259
choice ['b']
predict b answer a

13
frequently depressed 94.34011044176707
frequently motionless 94.33998493975903
frequently shaved 94.33998493975903
frequently intoxicated 94.33998493975903
frequently obscure 94.34011044176707
[94.34011044176707, 94.33998493975903, 94.33998493975903, 94.33998493975903, 94.34011044176707] 94.34011044176707
choice ['a', 'e']
predict a answer e

6
the moon 567.6045612809211
the sky 567.6047170504336
t

31
and gathered 1396.2820183493645
and vanished 1396.28205505103
and peering 1396.2819757754328
and crackled 1396.281930999401
and cast 1396.2821783686256
[1396.2820183493645, 1396.28205505103, 1396.2819757754328, 1396.281930999401, 1396.2821783686256] 1396.2821783686256
choice ['e']
predict e answer b

3
important incident 32.25343500961803
important revolution 32.25370981038747
important complication 32.253091508656226
important incongruity 32.253091508656226
important addition 32.25370981038747
[32.25343500961803, 32.25370981038747, 32.253091508656226, 32.253091508656226, 32.25370981038747] 32.25370981038747
choice ['b', 'e']
predict b answer e

17
his sanctuary 371.9389206882411
his conscience 371.9394535422514
his host 371.9394559977998
his editorial 371.93890349940204
his wickedness 371.9389501548223
[371.9389206882411, 371.9394535422514, 371.9394559977998, 371.93890349940204, 371.9389501548223] 371.9394559977998
choice ['c']
predict c answer b

15
quite gentle 146.8377598152425


predict a answer d

4
this train 335.81310984900057
this apparition 335.8129412794429
this stranger 335.81310984900057
this temporary 335.8128634781086
this frantic 335.8128634781086
[335.81310984900057, 335.8129412794429, 335.81310984900057, 335.8128634781086, 335.8128634781086] 335.81310984900057
choice ['a', 'c']
predict c answer c

13
was addressed 523.6574449583719
was intolerable 523.6572284921369
was disinclined 523.6571840888067
was removed 523.6574653098983
was disposed 523.657382053654
[523.6574449583719, 523.6572284921369, 523.6571840888067, 523.6574653098983, 523.657382053654] 523.6574653098983
choice ['d']
predict d answer d

21
was pale 523.6574856614246
was jolting 523.6571711378353
was scant 523.6571766882516
was breaking 523.6573450508789
was furious 523.6573432007401
[523.6574856614246, 523.6571711378353, 523.6571766882516, 523.6573450508789, 523.6573432007401] 523.6574856614246
choice ['a']
predict a answer d

13
to luxuriate 450.3226213648498
to wiggle 450.322623204

[486.63116268427865, 486.6296692176261, 486.6298149216898, 486.62971930339796, 486.6298160600028] 486.63116268427865
choice ['a']
predict a answer a

10
other idler 74.14547318839419
other emperor 74.14547719371326
other barber 74.14547719371326
other aunt 74.14550923626577
other minister 74.14547719371326
[74.14547318839419, 74.14547719371326, 74.14547719371326, 74.14550923626577, 74.14547719371326] 74.14550923626577
choice ['d']
predict d answer a

12
leading complaints 63.549035699630686
leading results 63.549035699630686
leading officers 63.549035699630686
leading spirits 63.55036930652441
leading voices 63.549138284776355
[63.549035699630686, 63.549035699630686, 63.549035699630686, 63.55036930652441, 63.549138284776355] 63.55036930652441
choice ['d']
predict d answer b

6
between heaven 290.8812324389841
between Bombay 290.88073659853455
between fifty 290.8812324389841
between seven-thirty 290.88066773180543
between nine 290.8813426257507
[290.8812324389841, 290.88073659853455, 29

it embraced 162.29572124965284
it whispered 162.29574073893616
it widened 162.29572124965284
[162.29573343045493, 162.29572124965284, 162.29572124965284, 162.29574073893616, 162.29572124965284] 162.29574073893616
choice ['d']
predict d answer e

5
very difficult 174.99651496376907
very attentive 174.99363265361094
very impatient 174.9935249036985
very inaccessible 174.99318481803732
very anxious 174.99570683942568
[174.99651496376907, 174.99363265361094, 174.9935249036985, 174.99318481803732, 174.99570683942568] 174.99651496376907
choice ['a']
predict a answer b

16
the labourers 567.6037505714131
the rivers 567.6037992493857
the fittings 567.6037439335078
the singers 567.6037576518455
the paving-stones 567.6037448185618
[567.6037505714131, 567.6037992493857, 567.6037439335078, 567.6037576518455, 567.6037448185618] 567.6037992493857
choice ['b']
predict b answer c

13
ever overlooked 143.10921852489548
ever apportioned 143.10921852489548
ever examined 143.10922730946277
ever facilitate

been banished 65.02658690675142
been seduced 65.0265482665018
[65.02646132594016, 65.02662554700103, 65.02648064606497, 65.02658690675142, 65.0265482665018] 65.02662554700103
choice ['b']
predict b answer a

11
was beautifully 523.6572377428308
was purple 523.6571951896393
was positively 523.6572784458835
was glossy 523.6571692876967
was deadly 523.6572321924145
[523.6572377428308, 523.6571951896393, 523.6572784458835, 523.6571692876967, 523.6572321924145] 523.6572784458835
choice ['c']
predict c answer e

15
his horrified 371.93890288551495
his nerveless 371.93893296598327
his cunning 371.9390189101785
his claw-like 371.93890349940204
his sympathetic 371.9389206882411
[371.93890288551495, 371.93893296598327, 371.9390189101785, 371.93890349940204, 371.9389206882411] 371.9390189101785
choice ['c']
predict c answer e

12
so strangely 270.69626131331324
so nobly 270.6957588981317
so foolishly 270.69568712453435
so lingeringly 270.69555613771917
so accurately 270.6957158339733
[270.6962613

little basket 299.41121716901
little dog 299.4125228494069
[299.41040283676244, 299.410447504776, 299.4104062727635, 299.41121716901, 299.4125228494069] 299.4125228494069
choice ['e']
predict e answer b

7
my mistress 318.35676894569036
my assistant 318.355880589694
my estate 318.3559950404665
my fate 318.35676894569036
my grandmother 318.3563329427473
[318.35676894569036, 318.355880589694, 318.3559950404665, 318.35676894569036, 318.3563329427473] 318.35676894569036
choice ['a', 'd']
predict d answer b

4
a copy 486.63058214464996
a map 486.6297591443529
a meeting 486.6299640406924
a box 486.63023495918577
a plantation 486.62969198388606
[486.63058214464996, 486.6297591443529, 486.6299640406924, 486.63023495918577, 486.62969198388606] 486.63058214464996
choice ['a']
predict a answer a

5
and unalterable 1396.2819243931012
and inexorable 1396.2819273292346
and agreeable 1396.282019083398
and intimate 1396.281947882167
and destructive 1396.281942743934
[1396.2819243931012, 1396.281927329

the jug 567.6037740253454
the baby 567.6041138860999
[567.6037771230345, 567.6043510805847, 567.6037784506157, 567.6037740253454, 567.6041138860999] 567.6043510805847
choice ['b']
predict b answer c

14
a shelf 486.6297750807349
a hill 486.6300471375412
a nail 486.62976597423085
a field 486.6299890835784
a stone 486.6305240906871
[486.6297750807349, 486.6300471375412, 486.62976597423085, 486.6299890835784, 486.6305240906871] 486.6305240906871
choice ['e']
predict e answer e

7
the purple 567.6038811168852
the herring 567.6037426059266
the immense 567.603859433061
the lying 567.6037470311969
the hot 567.6039806854656
[567.6038811168852, 567.6037426059266, 567.603859433061, 567.6037470311969, 567.6039806854656] 567.6039806854656
choice ['e']
predict e answer c

5
nocturnal corridor 27.95886075949367
nocturnal barracks 27.95886075949367
nocturnal aspect 27.959915611814345
nocturnal expedition 27.964135021097047
nocturnal law 27.95886075949367
[27.95886075949367, 27.95886075949367, 27.9599

0.2759615384615385

In [12]:
SCC.predict_and_score(method=("unigram"))

predict e answer c

predict e answer a

predict b answer d

predict c answer c

predict c answer d

predict e answer b

predict c answer b

predict c answer c

predict e answer c

predict a answer a

predict c answer b

predict b answer b

predict e answer d

predict c answer e

predict d answer c

predict c answer c

predict c answer e

predict a answer a

predict c answer e

predict e answer c

predict c answer d

predict d answer a

predict a answer d

predict e answer c

predict a answer d

predict c answer b

predict c answer d

predict c answer c

predict e answer d

predict d answer d

predict c answer d

predict c answer d

predict b answer c

predict e answer a

predict e answer e

predict e answer c

predict a answer d

predict d answer c

predict d answer e

predict a answer a

predict c answer d

predict d answer c

predict e answer e

predict d answer a

predict d answer a

predict b answer c

predict d answer d

predict e answer e

predict a answer c

predict b answer d




predict c answer c

predict e answer e

predict d answer b

predict b answer a

predict b answer e

predict e answer d

predict a answer d

predict b answer b

predict e answer a

predict d answer e

predict e answer c

predict a answer a

predict c answer b

predict d answer b

predict a answer d

predict d answer d

predict a answer a

predict c answer e

predict a answer d

predict e answer b

predict c answer d

predict c answer b

predict c answer a

predict b answer c

predict b answer e

predict e answer b

predict d answer d

predict a answer d

predict e answer d

predict a answer b

predict e answer c

predict d answer d

predict d answer d

predict a answer d

predict c answer d

predict d answer b

predict d answer d

predict a answer e

predict a answer d

predict a answer b

predict b answer d

predict b answer a

predict a answer a

predict c answer c

predict d answer d

predict e answer c

predict e answer d

predict e answer c

predict c answer c

predict d answer b



predict b answer d

predict c answer b

predict e answer a

predict d answer c

predict c answer a

predict c answer e

predict d answer b

predict d answer a

predict c answer c

predict c answer a

predict d answer b

predict e answer e

predict c answer a

predict b answer b

predict a answer d

predict a answer d

predict a answer a

predict e answer d

predict c answer b

predict b answer e

predict d answer d

predict e answer b

predict d answer d

predict c answer a

predict d answer a

predict a answer a

predict b answer c

predict e answer e

predict d answer c

predict e answer d

predict c answer a

predict b answer e

predict c answer a

predict a answer c

predict d answer e

predict b answer e

predict e answer d

predict b answer e

predict a answer d

predict c answer e

predict b answer e

predict c answer d

predict c answer d

predict b answer d

predict c answer e

predict e answer d

predict d answer b

predict c answer d

predict b answer e

predict b answer a


0.24903846153846154

In [9]:
SCC.predict_and_score(method=("word2vec"))

[0, 0, 2.9638175e-06, 0, 2.9161658e-06] 2.9638175e-06
choice ['c']
predict c answer c

[6.144552e-06, 0, 0, 0, 7.5495755e-06] 7.5495755e-06
choice ['e']
predict e answer a

[0, 2.1567043e-06, 0, 8.569099e-06, 0] 8.569099e-06
choice ['d']
predict d answer d

[1.8308937e-06, 9.204821e-06, 3.5002827e-06, 0, 1.1937703e-05] 1.1937703e-05
choice ['e']
predict e answer c

[1.7637776e-06, 1.8057366e-05, 2.989693e-06, 3.9582374e-05, 1.4113737e-06] 3.9582374e-05
choice ['d']
predict d answer d

[0, 6.685566e-06, 1.5432664e-06, 1.5793842e-06, 1.9659606e-06] 6.685566e-06
choice ['b']
predict b answer b

[4.2566594e-06, 3.0566732e-06, 1.50533115e-05, 0, 2.4790018e-06] 1.50533115e-05
choice ['c']
predict c answer b

[1.7902008e-06, 0, 2.4279498e-06, 2.0144e-06, 0] 2.4279498e-06
choice ['c']
predict c answer c

[0, 0, 5.0963786e-06, 0, 1.5867125e-06] 5.0963786e-06
choice ['c']
predict c answer c

[5.98982e-06, 3.1302052e-06, 1.9930335e-06, 0, 4.182933e-06] 5.98982e-06
choice ['a']
predict a answer a


[0, 2.7426054e-06, 1.7536564e-06, 0, 2.3885889e-06] 2.7426054e-06
choice ['b']
predict b answer e

[2.0319721e-05, 2.831942e-06, 5.032463e-06, 1.0866429e-05, 0] 2.0319721e-05
choice ['a']
predict a answer d

[0, 0, 1.141487e-06, 0, 1.522779e-06] 1.522779e-06
choice ['e']
predict e answer c

[2.6461507e-06, 6.435238e-06, 0, 7.124958e-06, 7.477031e-06] 7.477031e-06
choice ['e']
predict e answer e

[0, 2.159371e-06, 1.5691405e-06, 2.5252534e-06, 0] 2.5252534e-06
choice ['d']
predict d answer c

[1.7075863e-06, 3.5923422e-06, 6.3549855e-06, 5.478875e-06, 8.375429e-06] 8.375429e-06
choice ['e']
predict e answer d

[4.9751525e-06, 3.2132077e-05, 0, 7.163666e-06, 3.0710205e-06] 3.2132077e-05
choice ['b']
predict b answer e

[2.8400173e-06, 3.617107e-06, 2.2806344e-06, 0, 2.6592054e-06] 3.617107e-06
choice ['b']
predict b answer a

[3.6088388e-06, 2.0236025e-06, 3.3596243e-06, 3.3320594e-05, 5.555077e-06] 3.3320594e-05
choice ['d']
predict d answer d

[7.773047e-06, 4.515489e-06, 2.7247827e-06

[2.816361e-06, 0, 7.4748946e-06, 4.255578e-06, 5.4565544e-06] 7.4748946e-06
choice ['c']
predict c answer c

[2.548607e-06, 0, 2.8742943e-06, 0, 2.7597441e-06] 2.8742943e-06
choice ['c']
predict c answer c

[1.8931396e-06, 0, 2.2074648e-06, 0, 0] 2.2074648e-06
choice ['c']
predict c answer d

[2.1475155e-06, 0, 2.503235e-06, 3.7542038e-06, 0] 3.7542038e-06
choice ['d']
predict d answer d

[1.7793154e-06, 3.9683396e-06, 3.1870381e-06, 3.671355e-06, 0] 3.9683396e-06
choice ['b']
predict b answer c

[0, 2.372617e-06, 0, 0, 0] 2.372617e-06
choice ['b']
predict b answer b

[4.2471347e-06, 6.120506e-06, 2.6602218e-06, 0, 9.0943795e-06] 9.0943795e-06
choice ['e']
predict e answer e

[3.441288e-06, 8.58399e-06, 2.8964973e-06, 6.806191e-06, 8.987881e-06] 8.987881e-06
choice ['e']
predict e answer b

[1.5105013e-06, 0, 1.5873434e-06, 3.316392e-06, 2.5010663e-06] 3.316392e-06
choice ['d']
predict d answer b

[0, 5.2185267e-05, 1.3819503e-06, 3.838379e-06, 0] 5.2185267e-05
choice ['b']
predict b a

[8.624238e-06, 0, 0, 4.509108e-06, 2.783174e-06] 8.624238e-06
choice ['a']
predict a answer a

[0, 0, 5.4614748e-06, 2.7045232e-06, 7.981967e-06] 7.981967e-06
choice ['e']
predict e answer c

[6.5881245e-06, 1.7272281e-06, 0, 3.3199897e-06, 1.2977179e-05] 1.2977179e-05
choice ['e']
predict e answer e

[4.566681e-06, 1.7690401e-06, 7.879891e-06, 1.6256154e-06, 2.1596331e-06] 7.879891e-06
choice ['c']
predict c answer c

[1.5681486e-05, 5.027831e-06, 3.403475e-06, 5.0842922e-05, 2.0617075e-05] 5.0842922e-05
choice ['d']
predict d answer d

[0, 0, 4.230376e-06, 1.9234344e-06, 0] 4.230376e-06
choice ['c']
predict c answer c

[0, 2.7448182e-06, 0, 0, 0] 2.7448182e-06
choice ['b']
predict b answer b

[4.733172e-06, 0, 6.2254453e-06, 1.9621552e-06, 0] 6.2254453e-06
choice ['c']
predict c answer d

[4.0625773e-06, 4.2795177e-06, 1.1262488e-05, 1.0914008e-05, 0] 1.1262488e-05
choice ['c']
predict c answer b

[2.5920456e-06, 6.5704717e-06, 6.194153e-06, 4.4468484e-06, 3.7332434e-06] 6.5704717e-0

[3.2621856e-06, 2.092364e-06, 0, 3.0650588e-06, 2.6792327e-06] 3.2621856e-06
choice ['a']
predict a answer c

[0, 2.0358864e-06, 2.161548e-06, 3.8498156e-06, 1.8876485e-06] 3.8498156e-06
choice ['d']
predict d answer b

[1.8968769e-06, 0, 5.021794e-06, 2.1107815e-06, 3.009735e-06] 5.021794e-06
choice ['c']
predict c answer c

[1.6061962e-06, 0, 1.8805082e-06, 5.9294525e-06, 0] 5.9294525e-06
choice ['d']
predict d answer d

[0, 5.389076e-06, 1.9552747e-06, 0, 1.8606897e-06] 5.389076e-06
choice ['b']
predict b answer b

[4.629169e-06, 3.0669546e-06, 0, 1.745595e-06, 3.6234385e-06] 4.629169e-06
choice ['a']
predict a answer b

[2.3652394e-06, 3.2627504e-06, 1.3617803e-05, 3.4054447e-05, 0] 3.4054447e-05
choice ['d']
predict d answer c

[5.889237e-06, 6.454265e-06, 1.2679166e-05, 1.3278357e-06, 1.7243962e-06] 1.2679166e-05
choice ['c']
predict c answer a

[0, 1.6176645e-06, 0, 1.9604875e-06, 3.2327662e-06] 3.2327662e-06
choice ['e']
predict e answer b

[1.8735066e-06, 1.8498854e-06, 1.6961

[3.1127302e-06, 1.5185041e-06, 0, 0, 3.7109637e-06] 3.7109637e-06
choice ['e']
predict e answer c

[4.325553e-06, 1.730064e-06, 0, 1.6462456e-06, 2.2170477e-06] 4.325553e-06
choice ['a']
predict a answer a

[7.957991e-06, 1.6644855e-06, 5.4282086e-06, 8.396195e-06, 1.8801196e-05] 1.8801196e-05
choice ['e']
predict e answer e

[7.826422e-06, 4.7236e-06, 6.4561127e-06, 2.5505096e-06, 2.075107e-06] 7.826422e-06
choice ['a']
predict a answer c

[8.2982415e-06, 1.9200895e-06, 2.4978747e-06, 0, 5.160194e-06] 8.2982415e-06
choice ['a']
predict a answer a

[6.1991122e-06, 7.0221276e-06, 0, 2.6157363e-06, 3.3868841e-06] 7.0221276e-06
choice ['b']
predict b answer d

[9.088287e-06, 0, 1.2350943e-06, 4.3372947e-06, 2.9967177e-05] 2.9967177e-05
choice ['e']
predict e answer a

[3.5933824e-06, 7.4324557e-06, 2.1813262e-06, 1.1124064e-05, 1.3752785e-06] 1.1124064e-05
choice ['d']
predict d answer d

[2.1465485e-06, 5.8559795e-06, 0, 0, 4.0496234e-06] 5.8559795e-06
choice ['b']
predict b answer e

[2

[0, 0, 0, 2.163771e-06, 1.8858595e-06] 2.163771e-06
choice ['d']
predict d answer b

[3.3413428e-06, 1.7320599e-06, 0, 3.1262132e-06, 1.8702367e-06] 3.3413428e-06
choice ['a']
predict a answer d

[0, 1.5680195e-06, 3.691669e-06, 1.838339e-06, 0] 3.691669e-06
choice ['c']
predict c answer b

[2.6356022e-06, 1.9972658e-06, 4.4245085e-06, 2.4296971e-06, 3.1478178e-06] 4.4245085e-06
choice ['c']
predict c answer a

[0, 3.6400531e-06, 1.0182876e-05, 1.6651423e-06, 1.5519992e-06] 1.0182876e-05
choice ['c']
predict c answer c

[1.4484089e-06, 2.3226989e-05, 2.4855606e-05, 3.8354497e-06, 1.1490149e-05] 2.4855606e-05
choice ['c']
predict c answer e

[1.8712954e-06, 3.1183872e-06, 2.6186733e-06, 0, 5.6991953e-06] 5.6991953e-06
choice ['e']
predict e answer b

[3.4505151e-06, 0, 2.4741755e-06, 6.034018e-06, 0] 6.034018e-06
choice ['d']
predict d answer d

[5.8432056e-06, 4.6849655e-06, 3.8067806e-06, 3.5278772e-06, 5.0474596e-06] 5.8432056e-06
choice ['a']
predict a answer d

[0, 0, 1.5364384e-06

[2.8050692e-06, 0, 0, 2.436883e-06, 5.8951236e-06] 5.8951236e-06
choice ['e']
predict e answer a

[3.749687e-06, 5.3806816e-06, 1.6066358e-06, 2.4956692e-06, 2.5882425e-06] 5.3806816e-06
choice ['b']
predict b answer b

[4.5664624e-06, 2.6883113e-06, 3.7351695e-06, 0, 7.94815e-06] 7.94815e-06
choice ['e']
predict e answer e

[3.4832574e-06, 7.732174e-06, 5.9244717e-06, 1.7355833e-06, 1.7485322e-06] 7.732174e-06
choice ['b']
predict b answer c

[6.2155223e-06, 4.3552523e-06, 2.8481386e-06, 6.9599255e-06, 5.7053044e-06] 6.9599255e-06
choice ['d']
predict d answer d

[2.531483e-06, 1.4915119e-06, 4.776791e-06, 1.5494227e-06, 3.3882982e-06] 4.776791e-06
choice ['c']
predict c answer d

[0, 1.2431922e-05, 2.8506508e-06, 2.9324397e-06, 1.6782523e-06] 1.2431922e-05
choice ['b']
predict b answer b

[0, 1.985458e-06, 0, 2.2935362e-06, 1.799262e-06] 2.2935362e-06
choice ['d']
predict d answer e

[0, 6.3561e-06, 4.6413297e-06, 2.0490047e-06, 1.4279244e-06] 6.3561e-06
choice ['b']
predict b answer

[2.173264e-06, 0, 0, 1.8722628e-06, 1.4475813e-06] 2.173264e-06
choice ['a']
predict a answer d

[2.4602582e-06, 2.9192574e-06, 0, 2.1323715e-06, 0] 2.9192574e-06
choice ['b']
predict b answer a

[3.4358932e-06, 5.269651e-06, 3.7391756e-06, 2.7414758e-06, 7.697786e-06] 7.697786e-06
choice ['e']
predict e answer a

[3.2829382e-06, 2.648975e-06, 1.8005815e-06, 2.4275787e-06, 5.4962698e-06] 5.4962698e-06
choice ['e']
predict e answer d

[0, 0, 1.0914727e-05, 4.0385407e-06, 1.5137188e-06] 1.0914727e-05
choice ['c']
predict c answer c

[0, 0, 0, 2.7353224e-06, 0] 2.7353224e-06
choice ['d']
predict d answer e

[1.4020042e-06, 1.5220722e-06, 0, 0, 0] 1.5220722e-06
choice ['b']
predict b answer b

[0, 0, 3.323676e-06, 0, 0] 3.323676e-06
choice ['c']
predict c answer a

[1.9688453e-06, 4.3367695e-06, 3.6639885e-06, 2.018134e-06, 3.19986e-06] 4.3367695e-06
choice ['b']
predict b answer b

[4.0159416e-06, 2.2430988e-06, 3.14999e-06, 4.631339e-06, 0] 4.631339e-06
choice ['d']
predict d answer c

[

[4.887376e-06, 3.8865255e-06, 0, 0, 1.7562178e-06] 4.887376e-06
choice ['a']
predict a answer b

[4.234689e-06, 2.807874e-06, 2.1511034e-06, 3.151205e-06, 7.351286e-06] 7.351286e-06
choice ['e']
predict e answer e

[1.790193e-06, 1.944252e-06, 1.963911e-06, 6.1888636e-06, 0] 6.1888636e-06
choice ['d']
predict d answer b

[5.8189135e-06, 0, 2.429542e-06, 6.236341e-06, 6.919263e-06] 6.919263e-06
choice ['e']
predict e answer e

[1.02773065e-05, 8.66055e-06, 2.8186105e-06, 4.7697854e-06, 6.9266134e-06] 1.02773065e-05
choice ['a']
predict a answer b

[4.387865e-06, 0, 0, 3.256789e-06, 0] 4.387865e-06
choice ['a']
predict a answer d

[2.3475086e-06, 2.7582828e-06, 0, 0, 4.4091807e-06] 4.4091807e-06
choice ['e']
predict e answer e

[1.3349241e-06, 0, 0, 0, 3.1063646e-06] 3.1063646e-06
choice ['e']
predict e answer e

[6.2839918e-06, 3.8131743e-06, 0, 4.942905e-06, 1.8532293e-06] 6.2839918e-06
choice ['a']
predict a answer d

[0, 0, 4.5877714e-06, 2.550488e-06, 1.5703581e-06] 4.5877714e-06
ch

[3.0702142e-06, 0, 2.5820223e-06, 3.4170819e-06, 2.1785534e-06] 3.4170819e-06
choice ['d']
predict d answer b

[0, 3.3636713e-06, 1.8733405e-06, 1.5633171e-06, 3.343479e-06] 3.3636713e-06
choice ['b']
predict b answer a

[1.76855e-06, 3.7385694e-06, 0, 0, 4.3869964e-06] 4.3869964e-06
choice ['e']
predict e answer e

[1.6670093e-06, 2.10208e-06, 2.229209e-05, 1.5018349e-05, 6.4744277e-06] 2.229209e-05
choice ['c']
predict c answer d

[0, 0, 0, 5.3997837e-06, 1.6997235e-06] 5.3997837e-06
choice ['d']
predict d answer d

[3.2291437e-06, 0, 2.0769692e-06, 7.1187346e-06, 0] 7.1187346e-06
choice ['d']
predict d answer d

[5.1773e-06, 2.2651395e-05, 0, 8.105234e-06, 0] 2.2651395e-05
choice ['b']
predict b answer b

[4.316873e-06, 3.8677126e-06, 5.519125e-06, 2.70148e-06, 6.407027e-06] 6.407027e-06
choice ['e']
predict e answer c

[1.6848222e-06, 2.7751937e-06, 0, 3.0605856e-06, 0] 3.0605856e-06
choice ['d']
predict d answer b

[8.120534e-06, 1.7700821e-06, 2.3548725e-06, 3.6519439e-06, 2.0120

[0, 3.7332622e-06, 3.8255203e-06, 4.7967505e-06, 3.2401056e-06] 4.7967505e-06
choice ['d']
predict d answer c

[2.6326527e-06, 0, 0, 3.74238e-06, 5.9595127e-06] 5.9595127e-06
choice ['e']
predict e answer c

[2.544576e-06, 5.2054706e-06, 2.8953748e-06, 3.3727533e-06, 2.1974665e-06] 5.2054706e-06
choice ['b']
predict b answer c

[0, 0, 1.9116658e-06, 0, 0] 1.9116658e-06
choice ['c']
predict c answer c

[0, 0, 0, 1.4455578e-06, 2.1107617e-06] 2.1107617e-06
choice ['e']
predict e answer d

[4.9544888e-06, 0, 1.9011435e-06, 1.0234496e-05, 1.7111508e-06] 1.0234496e-05
choice ['d']
predict d answer d

[3.151592e-06, 3.221724e-06, 3.0280248e-06, 1.7182231e-06, 1.8908007e-06] 3.221724e-06
choice ['b']
predict b answer b

[4.631312e-06, 4.3477366e-06, 7.3916017e-06, 2.6049127e-06, 1.26209825e-05] 1.26209825e-05
choice ['e']
predict e answer c

[6.1457104e-06, 3.5474075e-06, 3.1771533e-06, 7.3758365e-06, 6.034829e-06] 7.3758365e-06
choice ['d']
predict d answer d

[0, 1.6811967e-06, 0, 0, 4.6434

[4.3891505e-06, 2.2453264e-06, 3.2910423e-06, 1.578909e-06, 6.485092e-06] 6.485092e-06
choice ['e']
predict e answer c

[2.8851514e-06, 1.963446e-06, 0, 2.3708803e-06, 6.668413e-06] 6.668413e-06
choice ['e']
predict e answer e

[1.7324065e-06, 1.9499366e-06, 3.2415107e-06, 3.26778e-06, 0] 3.26778e-06
choice ['d']
predict d answer c

[2.7910664e-06, 3.8129062e-06, 2.9110593e-06, 0, 3.4297907e-06] 3.8129062e-06
choice ['b']
predict b answer d

[6.2803406e-06, 5.329006e-06, 2.9752207e-06, 4.654423e-06, 2.3656212e-06] 6.2803406e-06
choice ['a']
predict a answer a

[0, 1.939164e-06, 0, 0, 2.3218238e-06] 2.3218238e-06
choice ['e']
predict e answer e

[0, 0, 1.323479e-06, 0, 2.3576845e-06] 2.3576845e-06
choice ['e']
predict e answer a

[3.1279726e-06, 2.1531566e-06, 7.3323827e-06, 4.5628212e-06, 1.8715225e-06] 7.3323827e-06
choice ['c']
predict c answer c

[2.7128403e-06, 2.4441633e-06, 2.045533e-06, 0, 0] 2.7128403e-06
choice ['a']
predict a answer e

[0, 1.8625689e-06, 1.6636524e-06, 3.7636

[1.8429438e-06, 2.4797675e-06, 4.475022e-06, 1.5500373e-06, 0] 4.475022e-06
choice ['c']
predict c answer a

[0, 4.815589e-06, 0, 2.5330025e-06, 2.9661558e-06] 4.815589e-06
choice ['b']
predict b answer b

[3.3290646e-06, 0, 3.1845834e-06, 4.7374688e-06, 7.4836958e-06] 7.4836958e-06
choice ['e']
predict e answer e

[0, 7.884552e-06, 1.5504969e-06, 5.9623885e-06, 0] 7.884552e-06
choice ['b']
predict b answer b

[0, 2.4807014e-06, 1.6032249e-06, 0, 1.4733416e-06] 2.4807014e-06
choice ['b']
predict b answer e

[5.1794145e-06, 7.0240053e-06, 2.1567594e-06, 1.641719e-06, 3.8044316e-06] 7.0240053e-06
choice ['b']
predict b answer a

[5.5879495e-06, 4.938359e-06, 1.7829752e-06, 8.671661e-06, 1.4677418e-05] 1.4677418e-05
choice ['e']
predict e answer a

[0, 0, 0, 6.1919673e-06, 3.4430757e-06] 6.1919673e-06
choice ['d']
predict d answer d

[3.5752914e-06, 0, 4.0098917e-06, 2.435967e-06, 4.6846035e-06] 4.6846035e-06
choice ['e']
predict e answer c

[3.3350946e-06, 0, 0, 4.1062e-06, 2.7951216e-06

0.46153846153846156