# Discourse Analysis

## Parse discourse tree with DPLP

In [2]:
# -*- coding: utf-8 -*-
import scipy
import numpy as np
import sklearn
import nltk
import tkinter

### Parsing of [file].brackets

In [32]:
filename='../data/test_tree.brackets'
def parse(filename):
    struct=list()
    with open(filename,'r') as f:
        for index,l in enumerate(f):
            s=l.split()
            s1=int(s[0][2])
            s2=int(s[1][0])
            nuc=s[2][1:-2] #nuclearity: Nucleus or Satellite
            rel=s[3][1:-2] #relation: 78 relations + 3
            struct.append({0:s1, 1:s2, 'nuc':nuc,'rel':rel})
    return struct

s=parse(filename)
print np.unique([r['rel'] for r in s])

['elaboration' 'purpose' 'span' 'temporal' 'textualorganization']


## Classifier
### Abstract class

In [4]:
class Classifier(object):
    def fit(self,x,y):
        raise NotImplementedError("fit not implemented")
    def predict(self,x):
        raise NotImplementedError("predict not implemented")
    def score(self,x,y):
        return (self.predict(x)==y).mean()

### Logit regression

In [286]:
def sig(x,w):
    return 1./(1+np.exp(x.dot(w)))

def sig_grad(x,w):    
    return x*(sig(x,w)*(1.-sig(x,w)))    
    
class Logit(Classifier):
    def __init__(self):
        pass
    
    def fit(self,x,y,eps):
        w=np.ones((len(x[0]),1))
        l=y-sig(x,w)
        l=np.hstack((l,l))
        for i in range(400):
            print w
            w.T[0]-=eps*(sig_grad(x,w).T.dot(l)).sum(axis=0).T
            
            
        self.w=w
        return w
    
    def predict(self,x):
        return sig(x,self.w)
    
    def score(self,x,y):
        pass
    
datax=np.array([[3,1],[2,2],[0,0]])
datay=np.array([[1,-1,1]]).T
w=np.ones((len(datax[0]),1))
log=Logit()

x=datax
y=datay


log.fit(x,y,0.01)


[[ 1.]
 [ 1.]]
[[ 1.00002541]
 [ 1.00002541]]
[[ 1.00005083]
 [ 1.00005083]]
[[ 1.00007624]
 [ 1.00007624]]
[[ 1.00010164]
 [ 1.00010164]]
[[ 1.00012705]
 [ 1.00012705]]
[[ 1.00015245]
 [ 1.00015245]]
[[ 1.00017785]
 [ 1.00017785]]
[[ 1.00020325]
 [ 1.00020325]]
[[ 1.00022864]
 [ 1.00022864]]
[[ 1.00025404]
 [ 1.00025404]]
[[ 1.00027943]
 [ 1.00027943]]
[[ 1.00030481]
 [ 1.00030481]]
[[ 1.0003302]
 [ 1.0003302]]
[[ 1.00035558]
 [ 1.00035558]]
[[ 1.00038096]
 [ 1.00038096]]
[[ 1.00040634]
 [ 1.00040634]]
[[ 1.00043171]
 [ 1.00043171]]
[[ 1.00045709]
 [ 1.00045709]]
[[ 1.00048246]
 [ 1.00048246]]
[[ 1.00050782]
 [ 1.00050782]]
[[ 1.00053319]
 [ 1.00053319]]
[[ 1.00055855]
 [ 1.00055855]]
[[ 1.00058391]
 [ 1.00058391]]
[[ 1.00060927]
 [ 1.00060927]]
[[ 1.00063462]
 [ 1.00063462]]
[[ 1.00065998]
 [ 1.00065998]]
[[ 1.00068533]
 [ 1.00068533]]
[[ 1.00071067]
 [ 1.00071067]]
[[ 1.00073602]
 [ 1.00073602]]
[[ 1.00076136]
 [ 1.00076136]]
[[ 1.0007867]
 [ 1.0007867]]
[[ 1.00081204]
 [ 1.00081204

array([[ 1.00997211],
       [ 1.00997211]])