In [1]:
from nltk.parse import DependencyGraph, DependencyEvaluator
from nltk.parse.transitionparser import TransitionParser, Configuration, Transition
import tempfile, os
from os import remove
import re
import pickle
try:
    from numpy import array
    from scipy import sparse
    from sklearn.datasets import load_svmlight_file
    from sklearn import svm
    from sklearn import linear_model
    from sklearn import neural_network
except ImportError:
    pass


  from collections import Sequence


In [52]:
class TransitionParserCustom(TransitionParser):
    def train(self, depgraphs, modelfile,modelType='logistic', njobs = 1,verbose=True):
        """
        :param depgraphs : list of DependencyGraph as the training data
        :type depgraphs : DependencyGraph
        :param modelfile : file name to save the trained model
        :type modelfile : str
        """

        try:
            input_file = tempfile.NamedTemporaryFile(
                prefix='transition_parse.train',
                dir=tempfile.gettempdir(),
                delete=False)

            if self._algorithm == self.ARC_STANDARD:
                self._create_training_examples_arc_std(depgraphs, input_file)
            else:
                self._create_training_examples_arc_eager(depgraphs, input_file)

            input_file.close()
            # Using the temporary file to train the libsvm classifier
            x_train, y_train = load_svmlight_file(input_file.name)
            model = None
            if modelType == 'logistic': 
                model = linear_model.LogisticRegression(
                    C=0.5,
                    verbose=verbose,
                    n_jobs=njobs,
                    solver='lbfgs'
                )
            elif modelType == 'MLP':
                model = neural_network.MLPClassifier(hidden_layer_sizes=(100,50),learning_rate='adaptive',max_iter=500)
            elif modelType == 'SVM':
                model = svm.SVC(
                kernel='poly',
                degree=2,
                coef0=0,
                gamma=0.2,
                C=0.5,
                verbose=verbose,
                probability=True)

            
            model.fit(x_train, y_train)
            # Save the model to file name (as pickle)
            pickle.dump(model, open(modelfile, 'wb'))
        finally:
            os.remove(input_file.name)

In [46]:
trainFile = "./UD_Hindi/hi-ud-train.conllu"
testFile = "./UD_Hindi/hi-ud-test.conllu"


def loadDataIntoDependencyGraph(trainFile, testFile, haveMorphoFeatures = True):
    ## This function reads the training and test data.
    #  It creates dependency graph based on requirement of morphological features.
    fd = open(trainFile)
    trainData = []
    graph = ""
    for line in fd:
        if line.strip() == "":
            trainData.append(graph)
            graph = ""
            continue

        cols = re.split("\t",line)
        if haveMorphoFeatures:
            cols[5] = cols[5] + '|' + cols[9][:-1]
        else:
            cols[5] = '_'
        #print(cols)
        finalLine = ""
        for i in cols:
            finalLine += i + "\t"
        finalLine = finalLine[:-1]
        finalLine += "\n"
        graph = graph + finalLine

    fd = open(testFile)
    testData = []
    graph = ""
    for line in fd:
        if line.strip() == "":
            testData.append(graph)
            graph = ""
            continue

        cols = re.split("\t",line)
        if haveMorphoFeatures:
            cols[5] = cols[5] + '|' + cols[9][:-1]
        else:
            cols[5] = '_'
        #print(cols)
        finalLine = ""
        for i in cols:
            finalLine += i + "\t"
        finalLine = finalLine[:-1]
        finalLine += "\n"
        graph = graph + finalLine
    
    trainDataGraph = []
    for t in trainData:
        d = DependencyGraph(t)
        trainDataGraph.append(d)

    testDataGraph = []
    for t in testData:
        d = DependencyGraph(t)
        testDataGraph.append(d)    

    return trainDataGraph, testDataGraph

In [20]:
# With morphological features
trainDataGraphMorpho, testDataGraphMorpho = loadDataIntoDependencyGraph(trainFile,testFile)
#Without morphological features
trainDataGraph, testDataGraph = loadDataIntoDependencyGraph(trainFile,testFile,haveMorphoFeatures=False)

  "The graph doesn't contain a node "


In [5]:
##
## Transition parser using arc-standard and SVM classifier
##

parser_std = TransitionParserCustom('arc-standard')

# Training on training data with morphological feautures
parser_std.train(trainDataGraph,'temp.arcstd.model',modelType='SVM', verbose=False)
result_std = parser_std.parse(testDataGraph, 'temp.arcstd.model')

# Training on training data without morphological feautures
parser_std.train(trainDataGraphMorpho,'temp.arcstd.morpho.model',modelType='SVM', verbose=False)
result_std_morpho = parser_std.parse(testDataGraphMorpho, 'temp.arcstd.morpho.model')

 Number of training examples : 500
 Number of valid (projective) examples : 476
 Number of training examples : 500
 Number of valid (projective) examples : 476


In [6]:
##
## Transition parser using arc-eager and SVM classifier
##

parser_eager = TransitionParserCustom('arc-eager')

# Training on training data with morphological feautures
parser_eager.train(trainDataGraph,'temp.arceager.model',modelType='SVM', verbose=False)
result_eager = parser_eager.parse(testDataGraph, 'temp.arceager.model')

# Training on training data without morphological feautures
parser_eager.train(trainDataGraphMorpho,'temp.arceager.morpho.model',modelType='SVM', verbose=False)
result_eager_morpho = parser_eager.parse(testDataGraphMorpho, 'temp.arceager.morpho.model')

 Number of training examples : 500
 Number of valid (projective) examples : 476
 Number of training examples : 500
 Number of valid (projective) examples : 476


In [7]:
de1 = DependencyEvaluator(result_std, testDataGraph)
de2 = DependencyEvaluator(result_std_morpho, testDataGraphMorpho)
de3 = DependencyEvaluator(result_eager, testDataGraph)
de4 = DependencyEvaluator(result_eager_morpho, testDataGraphMorpho)

In [8]:
"""
print(de1.eval())
print(de2.eval())
print(de3.eval())
print(de4.eval())
"""

'\nprint(de1.eval())\nprint(de2.eval())\nprint(de3.eval())\nprint(de4.eval())\n'

In [9]:
##
## Transition parser using arc-standard and logistic classifier
##

parser_std = TransitionParserCustom('arc-standard')

# Training on training data with morphological feautures
parser_std.train(trainDataGraph,'temp.arcstd.model',njobs=48, verbose=False)
result_std_logistic = parser_std.parse(testDataGraph, 'temp.arcstd.model')

# Training on training data without morphological feautures
parser_std.train(trainDataGraphMorpho,'temp.arcstd.morpho.model',njobs=48, verbose=False)
result_std_morpho_logistic = parser_std.parse(testDataGraphMorpho, 'temp.arcstd.morpho.model')

 Number of training examples : 500
 Number of valid (projective) examples : 476
 Number of training examples : 500
 Number of valid (projective) examples : 476


In [10]:
##
## Transition parser using arc-eager and logistic classifier
##

parser_eager = TransitionParserCustom('arc-eager')

# Training on training data with morphological feautures
parser_eager.train(trainDataGraph,'temp.arceager.model', njobs=48, verbose=False)
result_eager_logistic = parser_eager.parse(testDataGraph, 'temp.arceager.model')

# Training on training data without morphological feautures
parser_eager.train(trainDataGraphMorpho,'temp.arceager.morpho.model',njobs=48, verbose=False)
result_eager_morpho_logistic = parser_eager.parse(testDataGraphMorpho, 'temp.arceager.morpho.model')

 Number of training examples : 500
 Number of valid (projective) examples : 476
 Number of training examples : 500
 Number of valid (projective) examples : 476


In [11]:
de5 = DependencyEvaluator(result_std_logistic, testDataGraph)
de6 = DependencyEvaluator(result_std_morpho_logistic, testDataGraphMorpho)
de7 = DependencyEvaluator(result_eager_logistic, testDataGraph)
de8 = DependencyEvaluator(result_eager_morpho_logistic, testDataGraphMorpho)

In [12]:
"""
print(de5.eval())
print(de6.eval())
print(de7.eval())
print(de8.eval())
"""

'\nprint(de5.eval())\nprint(de6.eval())\nprint(de7.eval())\nprint(de8.eval())\n'

In [53]:
##
## Transition parser using arc-standard and MLP classifier
##

parser_std = TransitionParserCustom('arc-standard')

# Training on training data with morphological feautures
parser_std.train(trainDataGraph,'temp.arcstd.model', modelType='MLP',verbose=False)
result_std_mlp = parser_std.parse(testDataGraph, 'temp.arcstd.model')

# Training on training data without morphological feautures
parser_std.train(trainDataGraphMorpho,'temp.arcstd.morpho.model',modelType='MLP', verbose=False)
result_std_morpho_mlp = parser_std.parse(testDataGraphMorpho, 'temp.arcstd.morpho.model')

 Number of training examples : 500
 Number of valid (projective) examples : 476
 Number of training examples : 500
 Number of valid (projective) examples : 476


In [54]:
##
## Transition parser using arc-eager and MLP classifier
##

parser_eager = TransitionParserCustom('arc-eager')

# Training on training data with morphological feautures
parser_eager.train(trainDataGraph,'temp.arceager.model',modelType='MLP', verbose=False)
result_eager_mlp = parser_eager.parse(testDataGraph, 'temp.arceager.model')

# Training on training data without morphological feautures
parser_eager.train(trainDataGraphMorpho,'temp.arceager.morpho.model',modelType='MLP', verbose=False)
result_eager_morpho_mlp = parser_eager.parse(testDataGraphMorpho, 'temp.arceager.morpho.model')

 Number of training examples : 500
 Number of valid (projective) examples : 476
 Number of training examples : 500
 Number of valid (projective) examples : 476


In [55]:
de9 = DependencyEvaluator(result_std_mlp, testDataGraph)
de10 = DependencyEvaluator(result_std_morpho_mlp, testDataGraphMorpho)
de11 = DependencyEvaluator(result_eager_mlp, testDataGraph)
de12 = DependencyEvaluator(result_eager_morpho_mlp, testDataGraphMorpho)

In [56]:
"""
print(de9.eval())
print(de10.eval())
print(de11.eval())
print(de12.eval())
"""

'\nprint(de9.eval())\nprint(de10.eval())\nprint(de11.eval())\nprint(de12.eval())\n'

In [57]:
print("SVM based Transition Parser:")
print("(Without morphological features) arc-standard : ",de1.eval()," arc-eager : ", de3.eval())
print("(With morphological features) arc-standard : ",de2.eval()," arc-eager : ", de4.eval())

print("Logistic regression based Transition Parser:")
print("(Without morphological features) arc-standard : ",de5.eval()," arc-eager : ", de7.eval())
print("(With morphological features) arc-standard : ",de6.eval()," arc-eager : ", de8.eval())

print("MLP based Transition Parser:")
print("(Without morphological features) arc-standard : ",de9.eval()," arc-eager : ", de11.eval())
print("(With morphological features) arc-standard : ",de10.eval()," arc-eager : ", de12.eval())


SVM based Transition Parser:
(Without morphological features) arc-standard :  (0.8495842781557067, 0.7671957671957672)  arc-eager :  (0.8677248677248677, 0.7732426303854876)
(With morphological features) arc-standard :  (0.9138321995464853, 0.8329554043839759)  arc-eager :  (0.9108087679516251, 0.8269085411942555)
Logistic regression based Transition Parser:
(Without morphological features) arc-standard :  (0.7928949357520786, 0.6817838246409675)  arc-eager :  (0.8435374149659864, 0.7278911564625851)
(With morphological features) arc-standard :  (0.8669690098261527, 0.7671957671957672)  arc-eager :  (0.9024943310657596, 0.8027210884353742)
MLP based Transition Parser:
(Without morphological features) arc-standard :  (0.7951625094482238, 0.6848072562358276)  arc-eager :  (0.8208616780045351, 0.6931216931216931)
(With morphological features) arc-standard :  (0.8609221466364324, 0.7679516250944822)  arc-eager :  (0.8480725623582767, 0.7558578987150416)
