In [1]:
from nltk.parse import DependencyGraph, DependencyEvaluator
from nltk.parse.transitionparser import TransitionParser, Configuration, Transition
from sklearn.datasets import load_svmlight_file
from sklearn import svm, linear_model,neural_network


import pickle
import tempfile
import os
from numpy import array
from scipy import sparse

In [2]:
f = DependencyGraph.load("./UD_Hindi/hi-ud-train.conllu")
conf = Configuration(f[0])

  "The graph doesn't contain a node "


## Creating files for train & test (with & without the morphological feature)

In [None]:
#Train Data
file1 = open('./UD_Hindi/with_mor_train.conllu',"w+")
file2 = open('./UD_Hindi/without_mor_train.conllu',"w+")
with open("./UD_Hindi/hi-ud-train.conllu","r+",encoding = 'utf-8') as f:
    lines = f.readlines()
    for line in lines:
        if(line == '\n'):
            file1.write(line)
            file2.write(line)
        else:
            line = line.split('\t')
            line1 = list(line)
            line1[5] = line1[5]+"|"+line1[9][:-1]
            line[5] = "_"
            line = '\t'.join(line)
            line1 = '\t'.join(line1)
            file1.write(line1)
            file2.write(line)
file1.close()
file2.close()

In [3]:
#Test Data
file1 = open('./UD_Hindi/with_mor_test.conllu',"w+")
file2 = open('./UD_Hindi/without_mor_test.conllu',"w+")
with open("./UD_Hindi/hi-ud-test.conllu","r+",encoding = 'utf-8') as f:
    lines = f.readlines()
    for line in lines:
        if(line == '\n'):
            file1.write(line)
            file2.write(line)
        else:
            line = line.split('\t')
            line1 = list(line)
            line1[5] = line1[5]+"|"+line1[9][:-1]
            line[5] = "_"
            line = '\t'.join(line)
            line1 = '\t'.join(line1)
            file1.write(line1)
            file2.write(line)
file1.close()
file2.close()

## Transition Parser

In [20]:
class MainTransitionParser(TransitionParser):
    def train(self, dgraphs, modelfile, classifier="None",verbose=True):
        """
        :param dgraphs : List of training DependencyGraph 
        :param modelfile : Saves the trained model in the given file
        """

        try:
            input_file = tempfile.NamedTemporaryFile(prefix='transition_parse.train',
                                                    dir=tempfile.gettempdir(),delete=False)

            if self._algorithm == self.ARC_STANDARD:
                self._create_training_examples_arc_std(dgraphs, input_file)
            else:
                self._create_training_examples_arc_eager(dgraphs, input_file)

            input_file.close()

            x_train, y_train = load_svmlight_file(input_file.name)

            if(classifier == "svm"):
                model = svm.SVC(kernel='poly',degree=2,coef0=0,gamma=0.2,C=0.5,verbose=verbose,probability=True)
            elif(classifier == "logistic"):
                model = linear_model.LogisticRegression(C=0.7,solver='lbfgs',verbose=verbose)
            elif(classifier == "mlp"):
                model = neural_network.MLPClassifier(hidden_layer_sizes=(150,75,),learning_rate='adaptive',max_iter=1000)
            model.fit(x_train, y_train)
            
            pickle.dump(model, open(modelfile, 'wb'))
            
        finally:
            os.remove(input_file.name)


## With Morphological features

In [6]:
graph_mor_train = DependencyGraph.load("./UD_Hindi/with_mor_train.conllu")
graph_mor_test = DependencyGraph.load("./UD_Hindi/with_mor_test.conllu")

  "The graph doesn't contain a node "


### Arc-Standard

##### SVM Classifer

In [13]:
#Training
parser_mor_std_svm = MainTransitionParser('arc-standard')
parser_mor_std_svm.train(graph_mor_train,'./UD_Hindi/temp.arcstd_mor_svm.model',classifier = "svm", verbose=False)

#Testing
result_mor_std_svm = parser_mor_std_svm.parse(graph_mor_test, './UD_Hindi/temp.arcstd_mor_svm.model')
eval_mor_std_svm = DependencyEvaluator(result_mor_std_svm, graph_mor_test)
print(eval_mor_std_svm.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.9130763416477702, 0.8329554043839759)


##### Logistic Regression

In [21]:
#Training
parser_mor_std_log = MainTransitionParser('arc-standard')
parser_mor_std_log.train(graph_mor_train,'./UD_Hindi/temp.arcstd_mor_log.model', classifier = "logistic", verbose=False)

#Testing
result_mor_std_log = parser_mor_std_log.parse(graph_mor_test, './UD_Hindi/temp.arcstd_mor_log.model')
eval_mor_std_log = DependencyEvaluator(result_mor_std_log, graph_mor_test)
print(eval_mor_std_log.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8684807256235828, 0.7709750566893424)


##### MLP Classifier

In [22]:
#Training
parser_mor_std_mlp = MainTransitionParser('arc-standard')
parser_mor_std_mlp.train(graph_mor_train,'./UD_Hindi/temp.arcstd_mor_mlp.model', classifier = "mlp", verbose=False)

#Testing
result_mor_std_mlp = parser_mor_std_mlp.parse(graph_mor_test, './UD_Hindi/temp.arcstd_mor_mlp.model')
eval_mor_std_mlp = DependencyEvaluator(result_mor_std_mlp, graph_mor_test)
print(eval_mor_std_mlp.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8594104308390023, 0.7619047619047619)


### Arc-Eager

##### SVM

In [25]:
#Training
parser_mor_eag_svm = MainTransitionParser('arc-eager')
parser_mor_eag_svm.train(graph_mor_train,'./UD_Hindi/temp.arceag_mor_svm.model',classifier = "svm", verbose=False)

#Testing
result_mor_eag_svm = parser_mor_eag_svm.parse(graph_mor_test, './UD_Hindi/temp.arceag_mor_svm.model')
eval_mor_eag_svm = DependencyEvaluator(result_mor_eag_svm, graph_mor_test)
print(eval_mor_eag_svm.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.9115646258503401, 0.8253968253968254)


##### Logistic Regression

In [27]:
#Training
parser_mor_eag_log = MainTransitionParser('arc-eager')
parser_mor_eag_log.train(graph_mor_train,'./UD_Hindi/temp.arceag_mor_log.model', classifier = "logistic", verbose=False)

#Testing
result_mor_eag_log = parser_mor_eag_log.parse(graph_mor_test, './UD_Hindi/temp.arceag_mor_log.model')
eval_mor_eag_log = DependencyEvaluator(result_mor_eag_log, graph_mor_test)
print(eval_mor_eag_log.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.9040060468631897, 0.8057445200302343)


##### MLP Classifier

In [28]:
#Training
parser_mor_eag_mlp = MainTransitionParser('arc-eager')
parser_mor_eag_mlp.train(graph_mor_train,'./UD_Hindi/temp.arceag_mor_mlp.model', classifier = "mlp", verbose=False)

#Testing
result_mor_eag_mlp = parser_mor_eag_mlp.parse(graph_mor_test, './UD_Hindi/temp.arceag_mor_mlp.model')
eval_mor_eag_mlp = DependencyEvaluator(result_mor_eag_mlp, graph_mor_test)
print(eval_mor_eag_mlp.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8684807256235828, 0.764928193499622)


## Without Morphological features

In [29]:
graph_train = DependencyGraph.load("./UD_Hindi/without_mor_train.conllu")
graph_test = DependencyGraph.load("./UD_Hindi/without_mor_test.conllu")

  "The graph doesn't contain a node "


### Arc-Standard

##### SVM Classifer

In [31]:
#Training
parser_std_svm = MainTransitionParser('arc-standard')
parser_std_svm.train(graph_train,'./UD_Hindi/temp.arcstd_svm.model', classifier = "svm", verbose=False)

#Testing
result_std_svm = parser_std_svm.parse(graph_test, './UD_Hindi/temp.arcstd_svm.model')
eval_std_svm = DependencyEvaluator(result_std_svm, graph_test)
print(eval_std_svm.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8465608465608465, 0.762660619803477)


##### Logistic Regression

In [34]:
#Training
parser_std_log = MainTransitionParser('arc-standard')
parser_std_log.train(graph_train,'./UD_Hindi/temp.arcstd_log.model', classifier = "logistic", verbose=False)

#Testing
result_std_log = parser_std_log.parse(graph_test, './UD_Hindi/temp.arcstd_log.model')
eval_std_log = DependencyEvaluator(result_std_log, graph_test)
print(eval_std_log.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.7974300831443688, 0.6863189720332578)


##### MLP Classifier

In [33]:
#Training
parser_std_mlp = MainTransitionParser('arc-standard')
parser_std_mlp.train(graph_train,'./UD_Hindi/temp.arcstd_mlp.model', classifier = "mlp", verbose=False)

#Testing
result_std_mlp = parser_std_mlp.parse(graph_test, './UD_Hindi/temp.arcstd_mlp.model')
eval_std_mlp = DependencyEvaluator(result_std_mlp, graph_test)
print(eval_std_mlp.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8019652305366591, 0.6878306878306878)


### Arc-Eager

##### SVM

In [36]:
#Training
parser_eag_svm = MainTransitionParser('arc-eager')
parser_eag_svm.train(graph_train,'./UD_Hindi/temp.arceag_svm.model',classifier="svm",verbose=False)

#Testing
result_eag_svm = parser_eag_svm.parse(graph_test, './UD_Hindi/temp.arceag_svm.model')
eval_eag_svm = DependencyEvaluator(result_eag_svm, graph_test)
print(eval_eag_svm.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.871504157218443, 0.7732426303854876)


##### Logistic Regression

In [37]:
#Training
parser_eag_log = MainTransitionParser('arc-eager')
parser_eag_log.train(graph_train,'./UD_Hindi/temp.arceag_log.model', classifier = "logistic", verbose=False)

#Testing
result_eag_log = parser_eag_log.parse(graph_test, './UD_Hindi/temp.arceag_log.model')
eval_eag_log = DependencyEvaluator(result_eag_log, graph_test)
print(eval_eag_log.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8473167044595616, 0.7309145880574452)


##### MLP Classifier

In [39]:
#Training
parser_eag_mlp = MainTransitionParser('arc-eager')
parser_eag_mlp.train(graph_train,'./UD_Hindi/temp.arceag_mlp.model', classifier = "mlp", verbose=False)

#Testing
result_eag_mlp = parser_eag_mlp.parse(graph_test, './UD_Hindi/temp.arceag_mlp.model')
eval_eag_mlp = DependencyEvaluator(result_eag_mlp, graph_test)
print(eval_eag_mlp.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8291761148904006, 0.7052154195011338)
