In [1]:
from nltk.parse import DependencyGraph, DependencyEvaluator
from nltk.parse.transitionparser import TransitionParser, Configuration, Transition
import tempfile
import os
from numpy import array
from scipy import sparse
from sklearn.datasets import load_svmlight_file
from sklearn import svm
from sklearn import linear_model
from sklearn import neural_network
import pickle

In [2]:
f = DependencyGraph.load("hi-ud-train.conllu")
conf = Configuration(f[0])
print(', '.join(conf.extract_features()))

FileNotFoundError: [Errno 2] No such file or directory: 'hi-ud-train.conllu'

## Creating two files - One with the morphological features and one without the morphological features

In [26]:
#Train Data
f1 = open('with_morpho_train.conllu',"w+")
f2 = open('without_morpho_train.conllu',"w+")
with open("hi-ud-train.conllu","r+",encoding = 'utf-8') as f:
    lines = f.readlines()
    for line in lines:
        if(line == '\n'):
            f1.write(line)
            f2.write(line)
        else:
            line = line.split('\t')
            line1 = list(line)
            line1[5] = line1[5]+"|"+line1[9][:-1]
            line[5] = "_"
            line = '\t'.join(line)
            line1 = '\t'.join(line1)
            f1.write(line1)
            f2.write(line)
f1.close()
f2.close()

#Test Data
f1 = open('with_morpho_test.conllu',"w+")
f2 = open('without_morpho_test.conllu',"w+")
with open("hi-ud-test.conllu","r+",encoding = 'utf-8') as f:
    lines = f.readlines()
    for line in lines:
        if(line == '\n'):
            f1.write(line)
            f2.write(line)
        else:
            line = line.split('\t')
            line1 = list(line)
            line1[5] = line1[5]+"|"+line1[9][:-1]
            line[5] = "_"
            line = '\t'.join(line)
            line1 = '\t'.join(line1)
            f1.write(line1)
            f2.write(line)
f1.close()
f2.close()


## Custom Transition Parser

In [27]:
class MyTransitionParser(TransitionParser):
    def train(self, depgraphs, modelfile, classifier="svm",verbose=True):
        """
        :param depgraphs : list of DependencyGraph as the training data
        :type depgraphs : DependencyGraph
        :param modelfile : file name to save the trained model
        :type modelfile : str
        """

        try:
            input_file = tempfile.NamedTemporaryFile(
                prefix='transition_parse.train',
                dir=tempfile.gettempdir(),
                delete=False)

            if self._algorithm == self.ARC_STANDARD:
                self._create_training_examples_arc_std(depgraphs, input_file)
            else:
                self._create_training_examples_arc_eager(depgraphs, input_file)

            input_file.close()
            # Using the temporary file to train the libsvm classifier
            x_train, y_train = load_svmlight_file(input_file.name)
            # The parameter is set according to the paper:
            # Algorithms for Deterministic Incremental Dependency Parsing by Joakim Nivre
            # Todo : because of probability = True => very slow due to
            # cross-validation. Need to improve the speed here
            if(classifier == "svm"):
                model = svm.SVC(
                    kernel='poly',
                    degree=2,
                    coef0=0,
                    gamma=0.2,
                    C=0.5,
                    verbose=verbose,
                    probability=True)
            elif(classifier == "logistic"):
                model = linear_model.LogisticRegression(
                    C = 0.5,
                    solver = 'lbfgs',
                    verbose = verbose)
            elif(classifier == "mlp"):
                model = neural_network.MLPClassifier(
                    hidden_layer_sizes=(100,50,),
                    learning_rate = 'adaptive',
                    max_iter=1000
                    )
            model.fit(x_train, y_train)
            # Save the model to file name (as pickle)
            pickle.dump(model, open(modelfile, 'wb'))
        finally:
            os.remove(input_file.name)


## With Morphological features

In [28]:
graph_morpho_train = DependencyGraph.load("with_morpho_train.conllu")
graph_morpho_test = DependencyGraph.load("with_morpho_test.conllu")

  "The graph doesn't contain a node "


### Arc-Standard

##### SVM Classifer

In [29]:
#Training the parser
parser_m_std_svm = MyTransitionParser('arc-standard')
parser_m_std_svm.train(graph_morpho_train,'temp.arcstd_m_svm.model',verbose=False)

#Testing the parser
result_m_std_svm = parser_m_std_svm.parse(graph_morpho_test, 'temp.arcstd_m_svm.model')
d1 = DependencyEvaluator(result_m_std_svm, graph_morpho_test)
print(d1.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.9123204837490552, 0.8306878306878307)


##### Logistic Regression

In [30]:
#Training the parser
parser_m_std_log = MyTransitionParser('arc-standard')
parser_m_std_log.train(graph_morpho_train,'temp.arcstd_m_log.model', classifier = "logistic", verbose=False)

#Testing the parser
result_m_std_log = parser_m_std_log.parse(graph_morpho_test, 'temp.arcstd_m_log.model')
d2 = DependencyEvaluator(result_m_std_log, graph_morpho_test)
print(d2.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8669690098261527, 0.7671957671957672)


##### MLP Classifier

In [31]:
#Training the parser
parser_m_std_mlp = MyTransitionParser('arc-standard')
parser_m_std_mlp.train(graph_morpho_train,'temp.arcstd_m_mlp.model', classifier = "mlp", verbose=False)

#Testing the parser
result_m_std_mlp = parser_m_std_mlp.parse(graph_morpho_test, 'temp.arcstd_m_mlp.model')
d3 = DependencyEvaluator(result_m_std_mlp, graph_morpho_test)
print(d3.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8578987150415722, 0.7603930461073318)


### Arc-Eager

##### SVM

In [32]:
#Training the parser
parser_m_eag_svm = MyTransitionParser('arc-eager')
parser_m_eag_svm.train(graph_morpho_train,'temp.arceag_m_svm.model',verbose=False)

#Testing the parser
result_m_eag_svm = parser_m_eag_svm.parse(graph_morpho_test, 'temp.arceag_m_svm.model')
d4 = DependencyEvaluator(result_m_eag_svm, graph_morpho_test)
print(d4.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.9123204837490552, 0.8276643990929705)


##### Logistic Regression

In [33]:
#Training the parser
parser_m_eag_log = MyTransitionParser('arc-eager')
parser_m_eag_log.train(graph_morpho_train,'temp.arceag_m_log.model', classifier = "logistic", verbose=False)

#Testing the parser
result_m_eag_log = parser_m_eag_log.parse(graph_morpho_test, 'temp.arceag_m_log.model')
d5 = DependencyEvaluator(result_m_eag_log, graph_morpho_test)
print(d5.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.9024943310657596, 0.8027210884353742)


##### MLP Classifier

In [34]:
#Training the parser
parser_m_eag_mlp = MyTransitionParser('arc-eager')
parser_m_eag_mlp.train(graph_morpho_train,'temp.arceag_m_mlp.model', classifier = "mlp", verbose=False)

#Testing the parser
result_m_eag_mlp = parser_m_eag_mlp.parse(graph_morpho_test, 'temp.arceag_m_mlp.model')
d6 = DependencyEvaluator(result_m_eag_mlp, graph_morpho_test)
print(d6.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8639455782312925, 0.762660619803477)


## Without Morphological features

In [35]:
graph_train = DependencyGraph.load("without_morpho_train.conllu")
graph_test = DependencyGraph.load("without_morpho_test.conllu")

  "The graph doesn't contain a node "


### Arc-Standard

##### SVM Classifer

In [36]:
#Training the parser
parser_std_svm = MyTransitionParser('arc-standard')
parser_std_svm.train(graph_train,'temp.arcstd_svm.model',verbose=False)

#Testing the parser
result_std_svm = parser_std_svm.parse(graph_test, 'temp.arcstd_svm.model')
d7 = DependencyEvaluator(result_std_svm, graph_test)
print(d7.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8488284202569917, 0.764928193499622)


##### Logistic Regression

In [37]:
#Training the parser
parser_std_log = MyTransitionParser('arc-standard')
parser_std_log.train(graph_train,'temp.arcstd_log.model', classifier = "logistic", verbose=False)

#Testing the parser
result_std_log = parser_std_log.parse(graph_test, 'temp.arcstd_log.model')
d8 = DependencyEvaluator(result_std_log, graph_test)
print(d8.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.7928949357520786, 0.6817838246409675)


##### MLP Classifier

In [38]:
#Training the parser
parser_std_mlp = MyTransitionParser('arc-standard')
parser_std_mlp.train(graph_train,'temp.arcstd_mlp.model', classifier = "mlp", verbose=False)

#Testing the parser
result_std_mlp = parser_std_mlp.parse(graph_test, 'temp.arcstd_mlp.model')
d9 = DependencyEvaluator(result_std_mlp, graph_test)
print(d9.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.7974300831443688, 0.6870748299319728)


### Arc-Eager

##### SVM

In [39]:
#Training the parser
parser_eag_svm = MyTransitionParser('arc-eager')
parser_eag_svm.train(graph_train,'temp.arceag_svm.model',verbose=False)

#Testing the parser
result_eag_svm = parser_eag_svm.parse(graph_test, 'temp.arceag_svm.model')
d10 = DependencyEvaluator(result_eag_svm, graph_test)
print(d10.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.871504157218443, 0.7747543461829176)


##### Logistic Regression

In [40]:
#Training the parser
parser_eag_log = MyTransitionParser('arc-eager')
parser_eag_log.train(graph_train,'temp.arceag_log.model', classifier = "logistic", verbose=False)

#Testing the parser
result_eag_log = parser_eag_log.parse(graph_test, 'temp.arceag_log.model')
d11 = DependencyEvaluator(result_eag_log, graph_test)
print(d11.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8435374149659864, 0.7278911564625851)


##### MLP Classifier

In [41]:
#Training the parser
parser_eag_mlp = MyTransitionParser('arc-eager')
parser_eag_mlp.train(graph_train,'temp.arceag_mlp.model', classifier = "mlp", verbose=False)

#Testing the parser
result_eag_mlp = parser_eag_mlp.parse(graph_test, 'temp.arceag_mlp.model')
d12 = DependencyEvaluator(result_eag_mlp, graph_test)
print(d12.eval())

 Number of training examples : 501
 Number of valid (projective) examples : 477
(0.8155706727135299, 0.6931216931216931)
