In [67]:
# IMPORT STATEMENTS

from __future__ import print_function  # needed for Python 2
from __future__ import division        # needed for Python 2

import csv # csv reader
import re # regular expressions

#import nltk # Run once to install
#nltk.download('stopwords')
#nltk.download('wordnet')
#nltk.download('brown')

import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.svm import LinearSVC
from nltk.classify import SklearnClassifier
from random import shuffle
from sklearn.pipeline import Pipeline
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

from collections import Counter

# Count number of sentences spoken
from nltk.tokenize import sent_tokenize

# spaCy found to outperform nltk for labelling Dependecy Grammar Trees
import spacy
# English 
nlp = spacy.load('en_core_web_sm')

# Import to make Dependency Trees
from nltk import Tree

# Import to run word2vec and measure sentence similarities based on word vector
import gensim

# The following did not improve performance but were tested
# Imported brown corpus to learn POS tags from tagged sentences
import nltk
import sys
from nltk.corpus import brown
# Tested pre-trained CRF Tagger
from nltk.tag import CRFTagger

In [68]:
# Pre-trained POS-tagger provided in lectures
# Very similar performance to spaCy POS tagger
TAGGER_PATH = "crfpostagger"

tagger = CRFTagger()  # initialize tagger
tagger.set_model_file(TAGGER_PATH)

In [69]:
# FUNCTIONS TO LOAD FILES AND CREATE DATA SETS

In [70]:
# Use these functions if splitting training file into a training and heldout data sets

# Function used to load training file
def loadData(path, Text=None):
    with open(path) as f:
        
        #header = f.readline() # Do not skip header in training and test files
        
        reader = csv.reader(f, delimiter=',') # Comma delimited
        for line in reader:
            
            (Text, Character, Gender) = parseText(line)
            rawData.append((Text, Character, Gender))
            
    f.close()
        
# Split training file into into training and heldout set during cross validation
# Change between Gender and Character using CLASSIFY variable
def splitData(percentage): 
    dataSamples = len(rawData)
    halfOfData = int(len(rawData)/2)
    trainingSamples = int((percentage*dataSamples)/2)
    
    # Construct features for training data set
    for (Text, Character, Gender) in rawData[:trainingSamples] + rawData[halfOfData:halfOfData+trainingSamples]:
        
        # Variable to classify by Character or Gender
        # Change between Character and Gender
        CLASSIFY = Character
        
        trainData.append((toFeatureVectorTrain(Text, preProcessPOS(Text), preProcessUnigram(Text), preProcessBigram(Text), Character, Gender), CLASSIFY))
        
        # Add features to an overall feature dictionary containing all features
        for token in toFeatureVectorTrain(Text, preProcessPOS(Text), preProcessUnigram(Text), preProcessBigram(Text), Character, Gender):
            if token in featureDict:
                featureDict[token] += 1
            else:
                featureDict[token] = 1
    
    # Construct features for heldout set. Must run vector similarity on hedldoutData sentences to calculate appropriate probabilities
    i = 0
    for (Text, Character, Gender) in rawData[trainingSamples:halfOfData] + rawData[halfOfData+trainingSamples:]:
        
        # Variable to classify by Character or Gender
        # Change between Character and Gender
        CLASSIFY = Character
        
        heldoutData.append((toFeatureVectorTest(Text, preProcessPOS(Text), preProcessUnigram(Text), preProcessBigram(Text), character_prob[i], gender_prob[i]), CLASSIFY))
        i += 1

In [71]:
# Use these functions when not using a heldoutData set

# Load data from training and test files
def loadAllData(trainPath, testPath, Text=None):
    with open(trainPath) as f:
        
        #header = f.readline() # Don't skip header line 
        
        reader = csv.reader(f, delimiter=',') # Comma delimited data
        for line in reader:
            
            (Text, Character, Gender) = parseText(line)
            rawTrainData.append((Text, Character, Gender))
            
    f.close() 
    
    with open(testPath) as f:
        
        #header = f.readline()
        
        reader = csv.reader(f, delimiter=',')
        for line in reader:
            
            (Text, Character, Gender) = parseText(line)
            rawTestData.append((Text, Character, Gender))
            
    f.close()

# Create and add features to separate trainData and testData data sets
# Change between Gender and Character using CLASSIFY variable
def prepareData():
    
    for (Text, Character, Gender) in rawTrainData:
        
        # Variable to classify by Character or Gender
        # Change between Character and Gender
        CLASSIFY = Character
        
        trainData.append((toFeatureVectorTrain(Text, preProcessPOS(Text), preProcessUnigram(Text), preProcessBigram(Text), Character, Gender), CLASSIFY))
        
        # Add features to overall feature dictionary
        for token in toFeatureVectorTrain(Text, preProcessPOS(Text), preProcessUnigram(Text), preProcessBigram(Text), Character, Gender):
            if token in featureDict:
                featureDict[token] += 1
            else:
                featureDict[token] = 1
                
    i = 0            
    for (Text, Character, Gender) in rawTestData:
        
        # Variable to classify by Character or Gender
        # Change between Character and Gender
        CLASSIFY = Character
        
        testData.append((toFeatureVectorTest(Text, preProcessPOS(Text), preProcessUnigram(Text), preProcessBigram(Text), character_prob[i], gender_prob[i]), CLASSIFY))
        i += 1

In [72]:
# Convert line information from each input file into a tuple
def parseText(Line):
    
    # Capture text, character and gender
    lineText = Line[0]
    character = Line[1]
    gender = Line[2]
    
    # Return tuple of (LineText, Character, Gender)
    return (lineText, character, gender)


In [73]:
# TEXT PREPROCESSING FUNCTIONS

In [74]:
# UNIGRAMS

# Method converts line of text into appropriate unigram features
def preProcessUnigram(text):
    
    # Initialise lemmatiser
    lemmatizer = WordNetLemmatizer()
    
    # Remove html tags
    tags = re.compile('<.*?>')
    text = re.sub(tags,' ', text)
    
    # Add tokens for pauses, questions and exclamations
    text = re.sub(r"(\.)(\1)+", ' <PAUSE> ', text)
    text = re.sub(r"(\-)\1*", ' <PAUSE> ', text)
    text = re.sub('(\?)\1*', ' <?> ', text)
    text = re.sub('(\!)\1*', ' <!> ', text)
    
    # Split into words on whitespace
    tokens = text.split()
    
    # Convert to lower case
    tokens = [word.lower() for word in tokens]
    
    # Remove punctuation from each word without affecting tokens for pauses, questions...
    tokens = [word.translate(None, '\"\#\$\%\&\'\(\)\*\+\,\-\.\:\;\=\@\[\\\]\^\_\`\{\|\}\~') for word in tokens]
    
    # Remove remaining tokens that are not alphabet characters (a-z)
    tokens = [word for word in tokens if word.isalpha() or word == '<pause>' or word == '<?>' or word =='<!>']
    
    # Filter out stop words using nltk stopwords list
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if not word in stop_words]
    
    # Apply lemmatisation
    filtered_tokens = [str(lemmatizer.lemmatize(word)) for word in filtered_tokens]
    
    # Return unigram tokens
    return filtered_tokens

In [75]:
# BIGRAMS

# Bigram version of preprocessing where no longer remove any words and add <s> and </s> tokens
def preProcessBigram(text):
    
    # Initialise lemmatiser
    lemmatizer = WordNetLemmatizer()
    
    # Remove html tags
    tags = re.compile('<.*?>')
    text = re.sub(tags,' ', text)
    
    # Add tokens for pauses, questions and exclamations
    text = re.sub(r"(\.)(\1)+", ' <PAUSE> ', text)
    text = re.sub(r"(\-)\1*", ' <PAUSE> ', text)
    text = re.sub('(\?)\1*', ' <?> ', text)
    text = re.sub('(\!)\1*', ' <!> ', text)
    
    # Add <s> and </s> tags
    text = '<s> ' + text + ' </s>'
    text = re.sub('\.', ' </s> <s>', text)
    text = re.sub('</s> <s> </s>', '</s>', text)
    
    # Split into words on whitespace
    tokens = text.split()
    
    # Convert to lower case
    tokens = [word.lower() for word in tokens]
    
    # Remove punctuation from each word without affecting tokens for pauses, questions...
    tokens = [word.translate(None, '\"\#\$\%\&\'\(\)\*\+\,\-\.\:\;\=\@\[\\\]\^\_\`\{\|\}\~') for word in tokens]
    
    # Remove remaining tokens that are not alphabet characters (a-z) excluding deliberately added tokens
    tokens = [word for word in tokens if word.isalpha() or word == '<s>' or word == '</s>' or word == '<pause>' or word == '<?>' or word == '<!>']
    
    # Apply lemmatisation
    filtered_tokens = [str(lemmatizer.lemmatize(word)) for word in tokens]
    
    # Return bigram tokens
    return filtered_tokens

In [76]:
# POS TAGS

# PreProcessing for POS tags using CRF POS tagger - replaced with spaCy POS tagger
def preProcessPOS(text):
    
    # Remove html tags
    tags = re.compile('<.*?>')
    text = re.sub(tags,' ', text)
    
    # Split into words on whitespace
    tokens = text.split()
    
    # Convert to lower case
    tokens = [word.lower() for word in tokens]
    
    # Remove punctuation from each word
    tokens = [word.translate(None, '\!\"\#\$\%\&\'\(\)\*\+\,\-\.\:\;\=\?\@\[\\\]\^\_\`\{\|\}\~') for word in tokens]
    
    # Remove remaining tokens that are not alphabet characters (a-z)
    tokens = [word for word in tokens if word.isalpha()]
    
    tokens = [unicode(word) for word in tokens]
    filtered_tokens = tagger.tag(tokens)
    
    # Experimented with only keeping variouys types of POS tags though did not improve classification
    #filtered_tokens = [token for token in filtered_tokens if not token[1] == 'VBP']
    
    # Return POS tag tokens
    return filtered_tokens

In [77]:
# DOC2VEC VECTOR MODEL FUNCTIONS FOR SENTENCE SIMILARITY

In [98]:
# Function to build word vector features using unigrams and bigrams

def WvecFeatures(line, tokensBigram):
    
    # Unigram features
    features = gensim.utils.simple_preprocess(line)
    
    # Bigram word vector model tested but does not improve classification
    '''for i in range(1, len(tokensBigram)):
        context = tokensBigram[i-1] # Context word
        target = tokensBigram[i] # Target word
        ngram = context + " " + target # bigram
        #features[ngram] = 1
        if (bool(re.match('<.*> <.*>', ngram)) == False):
            features.append(ngram)'''
    
    return features

In [99]:
# Function to build Doc2Vec features from a given file

def trainWec(filename):   
    with open(filename) as f:
        
            reader = csv.reader(f, delimiter=',')
            
            for i, line in enumerate(reader):
                (Text, Character, Gender) = parseText(line)
                
                if (sent_tokenize(Text.decode('utf-8')) == []):
                    yield gensim.models.doc2vec.TaggedDocument([], [str(i)+" "+Character+" "+Gender])
                
                else:
                    # Create a word vector separately for each sentence present in each line of the data file
                    for sent in sent_tokenize(Text.decode('utf-8')):
                        sent = sent.encode('ascii', 'replace')
                    
                        yield gensim.models.doc2vec.TaggedDocument(WvecFeatures(sent, preProcessBigram(sent)), [str(i)+" "+Character+" "+Gender])

train_corpus = list(trainWec('training.csv'))

In [100]:
# Build word vector model on training data features. Optimised parameters.

model = gensim.models.doc2vec.Doc2Vec(vector_size = 250, dm = 0, alpha = 0.07, min_alpha = 0.01, min_count = 1, epochs = 200)

model.build_vocab(train_corpus)

In [101]:
model.train(train_corpus, total_examples = model.corpus_count, epochs = model.epochs)

In [102]:
# Remove duplicate tags. Required since model builds a separate vector for each sentence, not each line of data.
def removeDuplicates(seq):
    seen = set()
    seen_add = seen.add
    return [x for x in seq if not (x in seen or seen_add(x))]

In [103]:
# Function to find most frequent element in a list
def most_frequent(List): 
    return max(set(List), key = List.count) 

In [104]:
# Create word vector features for test corpus and save correct gender and character labels
test_corpus = list(trainWec('test.csv'))

# Correct gender and character labels for each line merging those where multiple sentences on a line
y_T = (removeDuplicates(map(lambda t: t.tags[0], test_corpus)))
y_true_GENDER = [word.split()[2] for word in y_T]
y_true_CHARACTER = [word.split()[1] for word in y_T]

In [105]:
# Function to normalise dictionary values so they sum to 1
def normalize(dict, target=1.0):
    
   #raw = sum(dict.values())
   factor = target/sum(dict.values())
   return {key:value*factor for key,value in dict.iteritems()}

In [106]:
# Calculate predicted labels based on similarity ratings of each sentence
# Compare each sentence in each line of test corpus against sentences in the training corpus

# List of predicted labels
y_pred = []

# Lists containing correct tag of line and calculated similarity probabilities for each gender or character
gender_prob = []
character_prob = []

# Calculate probabilities using top similar sentences
for doc_id in range(len(test_corpus)):
        
        tot_prob = 0 # Total probability
        labels = [] # Labels from all sentences
        sims_labels =[] # List of label and probability
        dict_gender = {} # Gender probabilities
        dict_character = {} # Character probabilities
        
        # Ranks sentences by how similar to sentence in test corpus
        inferred_vector = model.infer_vector(test_corpus[doc_id].words)
        sims = model.docvecs.most_similar([inferred_vector], topn = len(model.docvecs))
        
        # SET RANKED VARIABLE
        # Take top RANKED number of similar sentences
        RANKED = 25
        
        for sim in sims[:RANKED]:
            labels = [lab for lab in sim[0].split()]
            tot_prob += sim[1]
            sims_labels.append([labels, sim[1]])
        
        # Similarity probability for genders of top RANKED similar sentences
        # Work out average probability if multiple sentences
        for sim in sims_labels:    
            if sim[0][2] in dict_gender:
                dict_gender[sim[0][2]] += sim[1]/tot_prob
            else:
                dict_gender[sim[0][2]] = sim[1]/tot_prob
        
        # Similarity probability for characters of top RANKED similar sentences
        # Work out average probability if multiple sentences
        for sim in sims_labels:
            if sim[0][1] in dict_character:
                dict_character[sim[0][1]] += sim[1]/tot_prob
            else:
                dict_character[sim[0][1]] = sim[1]/tot_prob
        
        # Add calculated probabilities dict for each gender to a list indexed by correct label combining sentences
        if gender_prob == []:
            gender_prob.append([test_corpus[doc_id].tags, dict_gender])
        elif gender_prob[-1][0] == test_corpus[doc_id].tags or gender_prob == []:
            gender_prob[-1][1] = {key: (gender_prob[-1][1].get(key, 0) + dict_gender.get(key, 0))/2 for key in set(gender_prob[-1][1]) | set(dict_gender)}
        else:
            gender_prob.append([test_corpus[doc_id].tags, dict_gender])
            
        # Add calculated probabilities dict for each character to list indexed by correct label combining sentences
        if character_prob == []:
            character_prob.append([test_corpus[doc_id].tags, dict_character])
        elif character_prob[-1][0] == test_corpus[doc_id].tags or character_prob == []:
            character_prob[-1][1] = {key: (character_prob[-1][1].get(key, 0) + dict_character.get(key, 0))/2 for key in set(character_prob[-1][1]) | set(dict_character)}
        else:
            character_prob.append([test_corpus[doc_id].tags, dict_character])

for sim in gender_prob:
    sim[1] = normalize(sim[1]) # Normalise probabilities
for sim in character_prob:
    sim[1] = normalize(sim[1]) # Normalise probabilities

# Tested only providing a predicted gender label if highest predicted gender above 60%
#y_pred_GENDER = [max(sim[1], key = sim[1].get) if sim[1][max(sim[1], key = sim[1].get)] > 0.6 else 'NA' for sim in gender_prob]

# Predicted Genders and Characters based on sentence similarities
y_pred_GENDER = [max(sim[1], key = sim[1].get) for sim in gender_prob]
y_pred_CHARACTER = [max(sim[1], key = sim[1].get) for sim in character_prob]

In [107]:
# Calculate test scores for gender and character based on sentence similarity only
# These test scores are for interest only.
# The character probabilities for each sentence in character_prob are used as features in the linear classifier

'''# Check similarity prediction of sentences for a partciular character
n = 0
m = 0
o=0
for i in range(len(y_pred_GENDER)):
    if y_true_CHARACTER[i] == 'IAN':
        m += 1
        if y_pred_CHARACTER[i] == 'IAN':
            n += 1
        if y_pred_GENDER[i] == 'male':
            o +=1
print('IAN:',n/m,o/m)'''

            
# testData scores Gender based on sentence similarity only
test_results = list(precision_recall_fscore_support(y_true_GENDER, y_pred_GENDER, average='weighted'))
test_results[3] = accuracy_score(y_true_GENDER, y_pred_GENDER) * 100 # Add accuracy score to results for the fold

print("\nSentence Similarity Test Data scores for Gender\nPrecision:", test_results[0], "\nRecall:", test_results[1], "\nF1 score:", test_results[2], "\nAccuracy:", test_results[3])

# testData scores Character based on sentence similarity only 
test_results = list(precision_recall_fscore_support(y_true_CHARACTER, y_pred_CHARACTER, average='weighted'))
test_results[3] = accuracy_score(y_true_CHARACTER, y_pred_CHARACTER) * 100 # Add accuracy score to results for the fold

print("\nSentence Similarity Test Data scores for Character\nPrecision:", test_results[0], "\nRecall:", test_results[1], "\nF1 score:", test_results[2], "\nAccuracy:", test_results[3])


Sentence Similarity Test Data scores for Gender
Precision: 0.5588858124736784 
Recall: 0.550711743772242 
F1 score: 0.5489505999940698 
Accuracy: 55.071174377224196

Sentence Similarity Test Data scores for Character
Precision: 0.1767011135568113 
Recall: 0.16459074733096085 
F1 score: 0.13805258401676385 
Accuracy: 16.459074733096084


In [88]:
# CREATE CLASSIFIER FEATURES, TRAIN AND RUN CLASSIFER ON TEST DATA

In [89]:
# Function to build a dependency tree using spaCy
def to_nltk_tree(node):
    global NumberBranches
    if node.n_lefts + node.n_rights > 0:
        if node.n_lefts + node.n_rights >= 2:
            NumberBranches += 1
        return Tree(node.orth_, [to_nltk_tree(child) for child in node.children])
    else:
        return node.orth_

In [90]:
# Function to create features from testData set

# Returns a dictionary of tokens based on unigrams, bigrams, POS tags and Dependency Tree from spaCY
def toFeatureVectorTest(text, tokensPOS, tokensUnigram, tokensBigram, character_prob, gender_prob):

    # Feature dict
    features = {}
    
    lemmatizer = WordNetLemmatizer()
    
    # UNIGRAMS
    # Add unigrams to feature dict. Played with weights to optimise
    for unigram in tokensUnigram:
        if unigram in features:
            if unigram == '<pause>' or unigram == '<?>' or unigram == '<!>':
                features[unigram] += 2
            else:
                features[unigram] += 1
        else:
            features[unigram] = 2
    
    # CRF POS TAGS
    # No longer used as similar results to using spaCy
    '''for tag in tokensPOS:
        if tag[1] in features:
            features[tag[1]] += 0.4
        else:
            features[tag[1]] = 0.4'''
     
    # BIGRAMS
    # Add bigrams to feature dict
    for i in range(1, len(tokensBigram)):
        context = tokensBigram[i-1] # Context word
        target = tokensBigram[i] # Target word
        ngram = context + " " + target # bigram
      
        if (bool(re.match('<.*> <.*>', ngram)) == False):
            if ngram in features:
                features[ngram] += 1
            else:
                features[ngram] = 1
    
    # Number of sentences in line and avg number words in a sentence (including punctuation)
    #features['<numbersentences>'] = 0.025*len(sent_tokenize(text.decode('utf-8')))

    # DEPENDENCY TREE
    # Use spaCy to add features for maximum depth of tree, number of branches, root, subject, object and POS tags
    MaximumDepthTree = 0
    global NumberBranches
    NumberBranches = 0
    global MaximumBranches
    MaximumBranches = 0
    
    for sent in sent_tokenize(text.decode('utf-8')):
        sent = sent.encode('utf-8').translate(None, string.punctuation)
        sent_ = nlp(sent.decode('utf-8'))
        if (len(sent.split())<=1):
            MaximumDepthTree =1
        else:
            NumberBranches = 0
            myTree = [to_nltk_tree(sent.root) for sent in sent_.sents]
            if type(myTree[0])==Tree:
                for pos in myTree[0].treepositions():
                    #print(myTree[0][pos], len(pos))
                    pass
                if (len(pos) > MaximumDepthTree):
                    MaximumDepthTree = len(pos)
                if (NumberBranches > MaximumBranches):
                    MaximumBranches = NumberBranches
        
        # Root, object and subject of sentences together with word POS tags
        for token in sent_:
            if (token.dep_ == 'ROOT' or token.dep_ =='nsubj' or token.dep_ =='dobj'):
                if ('<'+token.dep_+'> '+lemmatizer.lemmatize(token.text.lower())) in features:
                    features['<'+token.dep_+'> '+lemmatizer.lemmatize(token.text.lower())] += 0.79
                else:
                    features['<'+token.dep_+'> '+lemmatizer.lemmatize(token.text.lower())] = 0.79
                
            if '<'+token.tag_+'>' in features:
                features['<'+token.tag_+'>'] += 0.4
            else:
                features['<'+token.tag_+'>'] = 0.4
    
    # Use sentence similarities to create features for possible genders and characters based on probabilities
    for key, value in character_prob[1].items():
        features["<"+key+">"] = value*0.29
    '''for key, value in gender_prob[1].items():
        features["<"+key+">"] = value*0.08'''
        
    # Tested Viterbi method for most likely POS tag chain for sentence but had difficulty identifying words
    '''POS_tags = [Viterbi(word_tokenize(sent)) for sent in sent_tokenize(text.decode('utf-8'))]
    for tag in POS_tags:
        if tag in features:
                features[tag] += 0.4
        else:
                features[tag] = 0.4'''
    
    # Label if no features in a sentence
    if (sum(features.values()) == 0):
        features['<nofeatures>'] = 1
    
    # Adjust factor to optimise model based on number of features used for classification
    # 1.0 for unigrams only bigram features only
    # 1.2 including POS tags, spaCy trees and Gender classification with Doc2Vec model
    # 1.75 for Character classification incorporating Doc2Vec model
    FACTOR = 1.75
     
    # Normalise word based features so each sentence counts equally
    features = normalize(features)
    features = {key: value*FACTOR*len(sent_tokenize(text.decode('utf-8'))) for key, value in features.items()}
    
    # Maximum dependency tree depth feature
    features["<MaximumDepthTree>"] = MaximumDepthTree*1.48
    # Maximum dependency tree branches did not improve classification
    #features["<MaximumBranches>"] = MaximumBranches*2.25
    
    return features


In [91]:
# Function to create features from trainData set

# Returns a dictionary of tokens based on unigrams, bigrams, POS tags and Dependency Tree from spaCY
def toFeatureVectorTrain(text, tokensPOS, tokensUnigram, tokensBigram, character, gender):

    # Feature Dict
    features = {}
    lemmatizer = WordNetLemmatizer()
    
    # UNIGRAMS
    # Add unigrams to feature dict. Played with weights to optimise
    for unigram in tokensUnigram:
        if unigram in features:
            if unigram == '<pause>' or unigram == '<?>' or unigram == '<!>':
                features[unigram] += 2
            else:
                features[unigram] += 1
        else:
            features[unigram] = 2
    
    # CRF POS TAGS
    # No longer used as similar results tom using spaCy
    '''for tag in tokensPOS:
        if tag[1] in features:
            features[tag[1]] += 0.4
        else:
            features[tag[1]] = 0.4'''   
    
    # BIGRAMS
    # Add bigrams to feature dict
    for i in range(1, len(tokensBigram)):
        context = tokensBigram[i-1] # Context word
        target = tokensBigram[i] # Target word
        ngram = context + " " + target # bigram

        if (bool(re.match('<.*> <.*>', ngram)) == False):
            if ngram in features:
                features[ngram] += 1
            else:
                features[ngram] = 1
    
    # Number of sentences in line and avg number words in a sentence (including punctuation)
    #features['<numbersentences>'] = 0.025*len(sent_tokenize(text.decode('utf-8')))
  
    # DEPENDENCY TREE
    # Use spaCy to add features for maximum depth of tree, number of branches, root, subject, object and POS tags
    MaximumDepthTree = 0
    global NumberBranches
    NumberBranches = 0
    global MaximumBranches
    MaximumBranches = 0
    
    for sent in sent_tokenize(text.decode('utf-8')):
        sent = sent.encode('utf-8').translate(None, string.punctuation)
        sent_ = nlp(sent.decode('utf-8'))
        if (len(sent.split())<=1):
            MaximumDepthTree =1
        else:
            NumberBranches = 0
            myTree = [to_nltk_tree(sent.root) for sent in sent_.sents]
            if type(myTree[0])==Tree:
                for pos in myTree[0].treepositions():
                    #print(myTree[0][pos], len(pos))
                    pass
                if (len(pos) > MaximumDepthTree):
                    MaximumDepthTree = len(pos)
                if (NumberBranches > MaximumBranches):
                    MaximumBranches = NumberBranches

        for token in sent_:
            if (token.dep_ == 'ROOT' or token.dep_ =='nsubj' or token.dep_ =='dobj'):
                if ('<'+token.dep_+'> '+lemmatizer.lemmatize(token.text.lower())) in features:
                    features['<'+token.dep_+'> '+lemmatizer.lemmatize(token.text.lower())] += 0.79
                else:
                    features['<'+token.dep_+'> '+lemmatizer.lemmatize(token.text.lower())] = 0.79
                
            if '<'+token.tag_+'>' in features:
                features['<'+token.tag_+'>'] += 0.4
            else:
                features['<'+token.tag_+'>'] = 0.4
    
    # Correct character and feature tag for line
    features["<"+character+">"] = 1
    #features["<"+gender+">"] = 1
    
    # Tested Viterbi method for most likely POS tag chain for sentence but had difficulty identifying words
    '''POS_tags = [Viterbi(word_tokenize(sent)) for sent in sent_tokenize(text.decode('utf-8'))]
    for tag in POS_tags:
        if tag in features:
                features[tag] += 0.4
        else:
                features[tag] = 0.4'''
    
    # Label if no features in sentence
    if (sum(features.values()) == 0):
        features['<nofeatures>'] = 1
        
    # Adjust factor to optimise model based on number of features used for classification
    # 1.0 for unigrams only bigram features only
    # 1.2 including POS tags, spaCy trees and Gender classification with Doc2Vec model
    # 1.75 for Character classification incorporating Doc2Vec model
    FACTOR = 1.75
     
    # Normalise word based features so each sentence counts equally
    features = normalize(features)
    features = {key: value*FACTOR*len(sent_tokenize(text.decode('utf-8'))) for key, value in features.items()}
    
    # Maxuimum dependency tree depth feature
    features["<MaximumDepthTree>"] = MaximumDepthTree*1.48
    # Maximum dependency tree branches did not improve classification
    #features["<MaximumBranches>"] = MaximumBranches*2.25
    
    return features

In [92]:
# TRAINING AND VALIDATING CLASSIFIER
# Pipeline version

def trainClassifier(trainData):
    
    print("Training Classifier...")
    
    # Adjusted and optimised the cost parameter.
    pipeline =  Pipeline([('svc', LinearSVC(C = 0.88))])
    
    # Trains with Feature Dictionary as trainData[0] and Labels as trainData[1] i.e. train(train_data, train_labels) 
    return SklearnClassifier(pipeline).train(trainData)

In [93]:
# TRAINING AND VALIDATING CLASSIFIER
# Option without pipeline as only running one operation

def trainClassifier2(trainData):
    
    print("Training Classifier...")
    
    # Trains with Feature Dictionary as trainData[0] and Labels as trainData[1]. Oprtimised C value to 0.88. 
    return SklearnClassifier(LinearSVC(C = 0.88)).train(trainData)

In [94]:
# Cross Validation Function based on chosen number of folds
def crossValidate(dataset, folds):
    
    shuffle(dataset) # So different set of folds each time run
    cv_results = [] # List to hold final values
    sum_results = [0, 0, 0, 0] # List to hold sum total of values for all 10 folds
    foldSize = int(len(dataset)/folds) # foldsize
    
    for i in range(0,len(dataset),foldSize): # will run 10 times for 10-folds
        
        testFold = dataset[i:i + foldSize] # One fold set aside to predict labels and test scores
        trainingFolds = dataset[0:i] + dataset[i + foldSize:] # Rest of the data used for training
        classifier = trainClassifier(trainingFolds) # Apply classifier to trainingData
        y_pred = predictLabels(testFold, classifier) # Predicted labels by classifier
        y_true = list(map(lambda t: t[1], testFold)) # Correct labels of testFold
        
        # Do a binary classification calculting F1 score with beta == 1 balance between precision and recall
        # Results weighted by number of samples of each class label
        fold_results = list(precision_recall_fscore_support(y_true, y_pred, average='weighted'))
        fold_results[3] = accuracy_score(y_true, y_pred) * 100 # Add accuracy score to results for the fold
        
        sum_results = [sum(x) for x in zip(fold_results, sum_results)] # Sum fold totals for precision, recall, fscore, accuracy
     
    cv_results = [y/folds for y in sum_results] # Calculate averages over all 10 folds
    
    return cv_results

In [95]:
# PREDICTING LABELS GIVEN CLASSIFIER
# Uses feature vectors created in the functions above

def predictLabels(SampleLines, classifier):
    return classifier.classify_many(map(lambda t: t[0], SampleLines))

In [108]:
# MAIN CODE BLOCK

# Used for cross validation (commented out to split training data) or as set to train classifier on training data and test on test data
# Split data into training and test sets, preprocess reviews, create feature vectors, run classifier and calculate scores

# Load in review data
rawData = []          # the filtered data from the dataset file
preprocessedData = [] # the preprocessed data
trainData = []  # the training data
heldoutData = [] # heldout data set made from split training data file
testData = [] # the test data

rawTrainData = []
rawTestData = []

# Initialise dependency tree depth and number of branches variable
NumberBranches = 0
MaximumBranches = 0

# A global feature dictionary and one to limit to the most common features
featureDict = Counter()
most_tokens = Counter()

# Path to the training and test data files
filePath_training = 'training.csv'
filePath_test = 'test.csv'

# Load training and test tests
loadAllData(filePath_training, filePath_test)
# Load training data for cross validation and heldout data set route
#loadData(filePath_training) 

print("Now %d rawData, %d trainData, %d testData" % (len(rawData), len(trainData), len(testData)),
      "Preparing training and test data...",sep='\n')

# Create features for training and test sets or split training set and create features for cross validation
prepareData()
# Split training data into a training and heldout set
#splitData(0.8)

# print the number of training samples and the number of features
print("Now %d rawData, %d trainData, %d testData" % (len(rawData), len(trainData), len(testData)),
      "Training Samples: ", len(trainData), "Features: ", len(featureDict), sep='\n')

# Option to limit feature list to a given limit of the most common features
print("Limiting features to the 45000 most common")
most_tokens = map(lambda t: t[0], featureDict.most_common(45000))
trainData = [({k:v for (k,v) in line[0].items() if k in most_tokens}, line[1]) for line in trainData]
testData = [({k:v for (k,v) in line[0].items() if k in most_tokens}, line[1]) for line in testData]

# Perform 10-fold cross validation on the training data as used when optimising classifier
'''cvResults = crossValidate(trainData, 10) 
print("\n10-fold Cross Validation average scores\nPrecision:", cvResults[0], "\nRecall:", cvResults[1], "\nF1 score:", cvResults[2], "\nAccuracy:", cvResults[3])'''

# Apply classifier to test data
trained_classifier = trainClassifier2(trainData)
y_pred = predictLabels(testData, trained_classifier)
y_true = list(map(lambda t: t[1], testData)) # Correct labels of testFold

# Calculate predicted data scores
test_results = list(precision_recall_fscore_support(y_true, y_pred, average='weighted'))
test_results[3] = accuracy_score(y_true, y_pred) * 100 # Add accuracy score to results for the fold

print("\nTest Data scores\nPrecision:", test_results[0], "\nRecall:", test_results[1], "\nF1 score:", test_results[2], "\nAccuracy:", test_results[3])

Now 0 rawData, 0 trainData, 0 testData
Preparing training and test data...
Now 0 rawData, 10113 trainData, 1124 testData
Training Samples: 
10113
Features: 
49376
Limiting features to the 45000 most common
Training Classifier...

Test Data scores
Precision: 0.25297534614443623 
Recall: 0.22597864768683273 
F1 score: 0.20236558653069567 
Accuracy: 22.597864768683273


In [46]:
# CODE BLOCK TO CHECK FEATURES OF TRAINING OR TEST DATA SETS AND TO SEE THE NUMBER OF EACH LABEL
# Used when making changes to the classifier

CharacterNames = {}
Genders = {}

with open('test.csv') as f:
        reader = csv.reader(f, delimiter=',')
        #next(reader, None)
        i = 0
        for line in reader:
            (Text, Character, Gender) = parseText(line)
            
            # Population dictionaries of gender and character occurrences
            if Character in CharacterNames:
                CharacterNames[Character]+=1
            else:
                CharacterNames[Character]=1
            if Gender in Genders:
                Genders[Gender]+=1
            else:
                Genders[Gender]=1
                
            print(toFeatureVectorTest(Text, preProcessPOS(Text), preProcessUnigram(Text), preProcessBigram(Text), character_prob[i], gender_prob[i]))
            #print(toFeatureVectorTrain(Text, preProcessPOS(Text), preProcessUnigram(Text), preProcessBigram(Text), Character, Gender))
            i += 1

# Dictionary of Character and Gender occurrences calculated to use as a baseline for classifier performance
print(CharacterNames)
print(Genders)

{'<STACEY>': 0.0037693421668769326, '<SEAN>': 0.0034374756400063267, '<MaximumDepthTree>': 1.48, u'<RP>': 0.06504065040650407, '<GARRY>': 0.0027272050331077734, 'out <?>': 0.16260162601626016, '<TANYA>': 0.006904960563443232, u'<NNP>': 0.06504065040650407, '<MINTY>': 0.0016747309590035243, '<HEATHER>': 0.0017807872313299344, '<SHIRLEY>': 0.005453923557347342, 'kicked': 0.3252032520325203, '<ROXY>': 0.0036154414777985386, u'<PRP>': 0.06504065040650407, u'<ROOT> kicked': 0.12845528455284555, '<s> kicked': 0.16260162601626016, '<RONNIE>': 0.0018772801468774822, '<?>': 0.3252032520325203, '<BRADLEY>': 0.0016493145294349592, '<CHRISTIAN>': 0.005542771621075088, 'kicked you': 0.16260162601626016, '<STEVEN>': 0.0035815567713662175, u'<dobj> you': 0.12845528455284555, 'you out': 0.16260162601626016, '<JACK>': 0.005139681847048091}
{'<MaximumDepthTree>': 1.48, 'meaning <?>': 0.2406417112299465, '<STACEY>': 0.004760683519264098, '<ROXY>': 0.002372934557025894, u'<ROOT> meaning': 0.19010695187165

{'bring bloke': 0.11059228311624478, 'bring': 0.22118456623248955, 'musical </s>': 0.11059228311624478, u'<VB>': 0.22118456623248955, 'hell': 0.3317768493487343, 'be able': 0.11059228311624478, '<s> listen': 0.11059228311624478, '<IAN>': 0.0017734477908527184, '<ROXY>': 0.001121083696697236, '<HEATHER>': 0.0032355463846107033, 'watch': 0.22118456623248955, u'<VBP>': 0.08847382649299582, u'<ROOT> know': 0.08736790366183339, 'know': 0.22118456623248955, 'listen to': 0.11059228311624478, u'<NN>': 0.17694765298599163, '<CHRISTIAN>': 0.0013677932258193387, '<CLARE>': 0.0020775348490389115, 'to bring': 0.11059228311624478, 'bloke back': 0.11059228311624478, u'<RB>': 0.04423691324649791, 'whatever his': 0.11059228311624478, 'back': 0.22118456623248955, 'kylie': 0.22118456623248955, '<JANE>': 0.001690619097085451, '<PHIL>': 0.0015344216773104879, 'own place': 0.11059228311624478, 'know whatever': 0.11059228311624478, 'alright in': 0.11059228311624478, 'lot do': 0.11059228311624478, u'<ROOT> li

{'me youve': 0.06739049045301385, u'<VB>': 0.1347809809060277, 'probably just': 0.06739049045301385, '<pause>': 0.2695619618120554, '<?>': 0.4043429427180831, 'of it': 0.06739049045301385, u'<dobj> it': 0.05323848745788094, 'i probably': 0.06739049045301385, '<?> dont': 0.06739049045301385, u'<VBG>': 0.026956196181205542, '<IAN>': 0.0021400940990731867, 'finger working': 0.06739049045301385, u'<ROOT> it': 0.05323848745788094, u'<DT>': 0.053912392362411084, 'get': 0.1347809809060277, '<HEATHER>': 0.000766977195754962, u'<TO>': 0.026956196181205542, 'gonna': 0.1347809809060277, u'<VBP>': 0.10782478472482217, u'<ROOT> know': 0.05323848745788094, 'nearly': 0.1347809809060277, 'it before': 0.06739049045301385, u'<NN>': 0.10782478472482217, u'<dobj> finger': 0.05323848745788094, 'me now': 0.06739049045301385, u'<WRB>': 0.026956196181205542, 'magic': 0.1347809809060277, 'like': 0.1347809809060277, '<CHRISTIAN>': 0.0011533235623616116, 'shy': 0.1347809809060277, u'<RB>': 0.2426057656308498, 'a

{'er you': 0.0718562874251497, '<STACEY>': 0.0008448978705669504, 'you <?>': 0.0718562874251497, u'<nsubj> other': 0.05676646706586826, 'what er': 0.0718562874251497, '<SEAN>': 0.0016021074975877524, '<JANE>': 0.0007920634434499423, '<PHIL>': 0.0016082854944336619, '<MaximumDepthTree>': 5.92, 'each other': 0.0718562874251497, u'<ROOT> what': 0.05676646706586826, '<GARRY>': 0.001297738578837794, 'er': 0.1437125748502994, '<TANYA>': 0.004819457622054793, u'<JJ>': 0.028742514970059883, 'two know': 0.0718562874251497, 'two': 0.1437125748502994, '<MINTY>': 0.0008196248410268293, 'you two': 0.0718562874251497, u'<DT>': 0.028742514970059883, '<SHIRLEY>': 0.0008136094463930346, '<IAN>': 0.0015947215179388458, 'other do': 0.0718562874251497, '<ROXY>': 0.0008250441082053956, u'<PRP>': 0.05748502994011977, 'do you': 0.0718562874251497, '<HEATHER>': 0.0024529860001131893, u'<nsubj> you': 0.05676646706586826, u'<VBP>': 0.05748502994011977, u'<WP>': 0.028742514970059883, 'know each': 0.0718562874251

{'<SHIRLEY>': 0.007353661360511612, '<MaximumDepthTree>': 1.48, u'<ROOT> it': 0.25948905109489057, u'<PRP>': 0.13138686131386862, '<CHRISTIAN>': 0.005682719028431115, '<pause>': 0.656934306569343, '<TANYA>': 0.014464074831165028, '<STEVEN>': 0.003566710695916675, '<HEATHER>': 0.003769150531391112, '<s> it': 0.3284671532846715, '<SEAN>': 0.003739981881669693, '<JANE>': 0.01866020599189116, '<PHIL>': 0.00396710065211204, '<RONNIE>': 0.011432154439112937, '<ROXY>': 0.003939723671222529, 'it <pause>': 0.3284671532846715, '<BRADLEY>': 0.0113706341307564, '<IAN>': 0.007309357238374458}
{u'<NNP>': 0.02304737516005122, 'dont': 0.11523687580025611, 'i dont': 0.05761843790012806, '<MAX>': 0.0006178659995399293, u'<nsubj> i': 0.09103713188220233, u'<ROOT> think': 0.045518565941101166, '<s> i': 0.05761843790012806, '<JANE>': 0.0006128178341183312, 'clares': 0.11523687580025611, '<MaximumDepthTree>': 2.96, 'im': 0.11523687580025611, u'<VB>': 0.02304737516005122, '<ROXY>': 0.0012233150778672393, 're

{u'<VBD>': 0.07445708376421924, '<MAX>': 0.006083683190215961, '<SEAN>': 0.008416785748231962, '<PHIL>': 0.0020338090255284393, '<STACEY>': 0.004097750277706829, '<s> who': 0.18614270941054806, u'<ROOT> died': 0.147052740434333, 'died <?>': 0.18614270941054806, '<TANYA>': 0.006696933000498114, 'who died': 0.18614270941054806, '<IAN>': 0.0020373886859866024, '<SHIRLEY>': 0.002813093888890116, '<ROXY>': 0.004395211530966977, '<HEATHER>': 0.002065139667377606, u'<nsubj> who': 0.147052740434333, u'<WP>': 0.07445708376421924, '<RONNIE>': 0.0021765422444041923, '<?>': 0.3722854188210961, 'died': 0.3722854188210961, '<MaximumDepthTree>': 1.48, '<STEVEN>': 0.002510357886185372, '<CLARE>': 0.0022673351150051205, '<JACK>': 0.008387355468061632}
{'<s> yeah': 0.03625377643504532, 'ive': 0.07250755287009064, '<STACEY>': 0.0007836007224665143, u'<RB>': 0.04350453172205439, '<SHIRLEY>': 0.0007792840094869784, '<MAX>': 0.0007597373162261491, 'aint i': 0.03625377643504532, u'<nsubj> i': 0.0286404833836

{u'<VBD>': 0.05363128491620112, '<MAX>': 0.0008341275993427875, '<s> they': 0.06703910614525141, '<SEAN>': 0.0014491925059090071, u'<dobj> wedding': 0.052960893854748614, 'first wedding': 0.06703910614525141, u'<VB>': 0.02681564245810056, u'<RP>': 0.02681564245810056, 'call the': 0.06703910614525141, '<TANYA>': 0.0036904823469470233, u'<JJ>': 0.02681564245810056, '<MINTY>': 0.0006955451562054007, 'they had': 0.06703910614525141, 'call': 0.13407821229050282, u'<DT>': 0.02681564245810056, u'<TO>': 0.02681564245810056, u'<nsubj> they': 0.10592178770949723, 'to call': 0.06703910614525141, '<IAN>': 0.0022514777977981387, '<SHIRLEY>': 0.0014733856404665535, '<ROXY>': 0.000693545037995816, u'<PRP>': 0.05363128491620112, '<HEATHER>': 0.0007636976465059853, '<RONNIE>': 0.002953381284824039, 'wedding': 0.13407821229050282, u'<NN>': 0.02681564245810056, 'wedding off': 0.06703910614525141, 'the first': 0.06703910614525141, 'off didnt': 0.06703910614525141, '<CHRISTIAN>': 0.002392502223488828, '<PH

{'<STACEY>': 0.0013384912973028573, u'<>': 0.04725959960617001, '<MAX>': 0.0038364229742234024, 'u <?>': 0.11814899901542501, '<JANE>': 0.0005970690363781432, u'<IN>': 0.04725959960617001, '<MaximumDepthTree>': 2.96, 'touch': 0.23629799803085003, '<ROXY>': 0.0006173976454599695, 'for u': 0.11814899901542501, '<GARRY>': 0.004947027646179346, u'<PRP>': 0.04725959960617001, '<pause>': 0.23629799803085003, '<TANYA>': 0.0011875221142306153, u'<JJ>': 0.04725959960617001, 'please': 0.23629799803085003, 'minty </s>': 0.11814899901542501, '<MINTY>': 0.0012020288178746553, 'minty': 0.23629799803085003, '<BRADLEY>': 0.0032072140325044357, u'<DT>': 0.04725959960617001, 'final': 0.23629799803085003, '<IAN>': 0.0025744329838106824, '<SHIRLEY>': 0.0013100593253459816, u'<ROOT> touch': 0.09333770922218576, 'please <pause>': 0.11814899901542501, u'<PRP$>': 0.04725959960617001, '<HEATHER>': 0.0019116196033015914, 'it the': 0.11814899901542501, '<RONNIE>': 0.002454801757278886, '<s> please': 0.1181489990

{u'<NNP>': 0.07355946056395585, 'move': 0.07355946056395585, 'sorry son': 0.036779730281977924, u'<VB>': 0.044135676338373515, 'is the': 0.036779730281977924, 'your sister': 0.036779730281977924, u'<ROOT> say': 0.029055986922762563, '<CLARE>': 0.00010050650313316704, '<pause>': 0.4413567633837351, 'young': 0.07355946056395585, 'be cooped': 0.036779730281977924, u'<CC>': 0.02942378422558234, u'<JJS>': 0.01471189211279117, 'tring': 0.11033919084593377, 'sorry': 0.07355946056395585, '<IAN>': 0.0010091855993115493, '<ROXY>': 0.00010504189180473274, 'dot <?>': 0.036779730281977924, '<pause> and': 0.07355946056395585, '<HEATHER>': 0.00045962661952444594, u'<TO>': 0.01471189211279117, 'besides it': 0.036779730281977924, u'<VBP>': 0.044135676338373515, 'im so': 0.036779730281977924, 'tringsure': 0.07355946056395585, u'<NN>': 0.10298324478953819, 'what about': 0.036779730281977924, '<pause> im': 0.036779730281977924, 'dont want': 0.036779730281977924, 'little oscar': 0.036779730281977924, 'like

{'<STACEY>': 0.0053656140410224965, '<SEAN>': 0.0016457996240005883, '<JANE>': 0.005094857334885885, '<PHIL>': 0.0018652360005048168, 'doesnt': 0.31386224934612034, 'gear doesnt': 0.15693112467306017, '<GARRY>': 0.005395498701627522, '<TANYA>': 0.0051449210522218425, 'that gear': 0.15693112467306017, 'doesnt </s>': 0.15693112467306017, u'<DT>': 0.06277244986922408, '<IAN>': 0.0034274710884895357, '<SHIRLEY>': 0.0016686704765454423, '<ROXY>': 0.0016684573466692562, 'gear': 0.31386224934612034, '<HEATHER>': 0.002275969795571869, '<s> that': 0.15693112467306017, '<RONNIE>': 0.0018156743103196718, u'<VBZ>': 0.06277244986922408, u'<NN>': 0.06277244986922408, '<BRADLEY>': 0.0016617952664985326, '<MaximumDepthTree>': 1.48, '<STEVEN>': 0.001668242944918275, '<CLARE>': 0.0034763539609275083, '<JACK>': 0.003335464210984208, u'<ROOT> doe': 0.12397558849171754, u'<nsubj> gear': 0.12397558849171754, u'<RB>': 0.06277244986922408}
{'<s> yeah': 0.06518196632265076, 'do is': 0.06518196632265076, 'right

{'someone </s>': 0.08139271987338913, u'<VBD>': 0.03255708794935565, 'all i': 0.08139271987338913, u'<nsubj> all': 0.06430024869997741, 'statement from': 0.08139271987338913, 'if i': 0.08139271987338913, '<MAX>': 0.002650233557329993, u'<nsubj> i': 0.19290074609993224, '<s> i': 0.08139271987338913, '<JANE>': 0.00046597605554536117, '<PHIL>': 0.0018629893067748937, 'asking if': 0.08139271987338913, 'desperate': 0.16278543974677825, 'need': 0.16278543974677825, 'wasnt desperate': 0.08139271987338913, '<GARRY>': 0.0006914591177554932, u'<PRP>': 0.09767126384806697, 'i wasnt': 0.08139271987338913, 'be asking': 0.08139271987338913, u'<ROOT> asking': 0.06430024869997741, '<TANYA>': 0.003328840354640102, u'<JJ>': 0.03255708794935565, 'need is': 0.08139271987338913, '<MINTY>': 0.0004344252974794118, 'from someone': 0.08139271987338913, 'wasnt': 0.16278543974677825, 'asking': 0.16278543974677825, 'statement': 0.16278543974677825, u'<MD>': 0.03255708794935565, u'<VBG>': 0.03255708794935565, 'i w

{'<STACEY>': 0.0029571303602877667, '<?> why': 0.17760236803157375, '<MAX>': 0.0039034435956026114, 'so why': 0.17760236803157375, '<SEAN>': 0.00404334963815977, '<JANE>': 0.008175107993242114, u'<ROOT> night': 0.14030587074494327, '<MaximumDepthTree>': 2.96, '<MINTY>': 0.004150055925524577, 'night <?>': 0.17760236803157375, '<GARRY>': 0.001618071991018896, '<TANYA>': 0.006086195704798161, 'why this': 0.17760236803157375, 'why last': 0.17760236803157375, u'<DT>': 0.0710409472126295, '<IAN>': 0.00616525336973457, '<SHIRLEY>': 0.0009866324240475689, '<ROXY>': 0.001964237105762016, '<HEATHER>': 0.0009780495196026154, '<RONNIE>': 0.0030956067001209515, '<s> so': 0.17760236803157375, u'<NN>': 0.0710409472126295, 'this <?>': 0.17760236803157375, '<?>': 0.710409472126295, '<BRADLEY>': 0.00094667929601259, u'<WRB>': 0.142081894425259, 'last': 0.3552047360631475, '<CHRISTIAN>': 0.0015971629112875138, '<PHIL>': 0.0010041157664651473, '<STEVEN>': 0.0018577352940916411, u'<RB>': 0.0710409472126295

{'ive': 0.2231174465447784, '<STACEY>': 0.002171422028453541, u'<RB>': 0.08924697861791137, '<MAX>': 0.0005634183155978588, '<SEAN>': 0.0011936737728987575, '<JANE>': 0.0030187413374332785, u'<IN>': 0.08924697861791137, '<MaximumDepthTree>': 1.48, u'<VB>': 0.04462348930895568, '<s> like': 0.1115587232723892, u'<VBN>': 0.08924697861791137, 'dragged through': 0.1115587232723892, 'through a': 0.1115587232723892, 'hedge backwards': 0.1115587232723892, 'very fetching': 0.1115587232723892, 'fetching': 0.2231174465447784, 'a hedge': 0.1115587232723892, '<MINTY>': 0.003041537743768633, '<?> very': 0.1115587232723892, '<GARRY>': 0.002794264276048033, u'<DT>': 0.04462348930895568, '<IAN>': 0.002286902959619926, '<SHIRLEY>': 0.0005598555141200855, '<ROXY>': 0.0005530926794118459, u'<PRP>': 0.04462348930895568, '<HEATHER>': 0.0005582696800017487, u'<ROOT> dragged': 0.08813139138518747, '<RONNIE>': 0.003889622058557693, 'backwards <?>': 0.1115587232723892, 'fetching </s>': 0.1115587232723892, u'<NN

{'<s> bet': 0.14661960358403475, 'a soldier': 0.14661960358403475, u'<VBD>': 0.1172956828672278, 'buff': 0.2932392071680695, '<MAX>': 0.002121072724424157, u'<nsubj> i': 0.11582948683138745, '<s> i': 0.14661960358403475, '<JANE>': 0.004991611168039083, 'know he': 0.14661960358403475, 'he wa': 0.2932392071680695, '<STACEY>': 0.0007976460614556597, u'<ROOT> passionate': 0.11582948683138745, 'wa a': 0.14661960358403475, 'soldier': 0.2932392071680695, '<TANYA>': 0.005123080829461219, 'hungry passionate': 0.14661960358403475, u'<JJ>': 0.1172956828672278, u'<NNP>': 0.0586478414336139, '<MINTY>': 0.0030950019972907595, '<s> hungry': 0.14661960358403475, '<GARRY>': 0.00122340955847114, u'<DT>': 0.0586478414336139, 'wa buff': 0.14661960358403475, 'i know': 0.14661960358403475, '<IAN>': 0.004783507717299899, '<SHIRLEY>': 0.001999381197631581, '<ROXY>': 0.002347959115892819, u'<PRP>': 0.17594352430084176, '<HEATHER>': 0.004583435586589193, u'<VBP>': 0.0586478414336139, '<RONNIE>': 0.0012609508623

{u'<dobj> someone': 0.05796983285772522, u'<nsubj> motor': 0.05796983285772522, '<STACEY>': 0.00237515210370337, 'forget': 0.14675907052588663, '<SHIRLEY>': 0.0004012094752099037, u'<CC>': 0.02935181410517733, '<MAX>': 0.000398170323717199, 'legit </s>': 0.07337953526294332, '<s> no': 0.07337953526294332, '<SEAN>': 0.00039590028071619554, 'from now': 0.07337953526294332, u'<IN>': 0.05870362821035466, u'<ROOT> is': 0.05796983285772522, 'kill': 0.14675907052588663, 'want': 0.14675907052588663, u'<VBZ>': 0.02935181410517733, u'<ROOT> cut': 0.05796983285772522, u'<VBN>': 0.02935181410517733, '<GARRY>': 0.000302262600430325, 'it </s>': 0.07337953526294332, 'cut': 0.14675907052588663, 'shuts': 0.14675907052588663, '<s> forget': 0.07337953526294332, '<pause>': 0.14675907052588663, 'more stolen': 0.07337953526294332, 'you want': 0.07337953526294332, 'and shuts': 0.07337953526294332, '<s> from': 0.07337953526294332, 'to kill': 0.07337953526294332, 'no cut': 0.07337953526294332, 'do you': 0.0733

{'real mate': 0.040014820303816215, 'wedding competition': 0.040014820303816215, 'runner </s>': 0.040014820303816215, 'of grand': 0.040014820303816215, '<MaximumDepthTree>': 4.4399999999999995, u'<dobj> competition': 0.03161170804001481, u'<VB>': 0.016005928121526486, 'saved': 0.08002964060763243, 'marrying': 0.08002964060763243, 'kind of': 0.040014820303816215, 'alright now': 0.040014820303816215, 'to marrying': 0.040014820303816215, 'saved the': 0.040014820303816215, u'<VBG>': 0.016005928121526486, '<IAN>': 0.0016255385314532553, 'real': 0.08002964060763243, 'stepped in': 0.040014820303816215, '<ROXY>': 0.00011376401795185949, 'couple': 0.08002964060763243, u'<VBP>': 0.03201185624305297, 'wedding': 0.08002964060763243, u'<NN>': 0.12804742497221186, '<SEAN>': 0.00032312907783956706, 'day': 0.08002964060763243, '<CHRISTIAN>': 0.0002237892311776157, '<s> now': 0.08002964060763243, '<CLARE>': 0.00045693920796041436, 'we win': 0.040014820303816215, 'this wedding': 0.040014820303816215, 'b

{'<SHIRLEY>': 0.0, '<STACEY>': 0.0, '<ROXY>': 0.0, '<CHRISTIAN>': 0.0, '<TANYA>': 0.0, '<MAX>': 0.0, '<CLARE>': 0.0, '<JACK>': 0.0, '<PHIL>': 0.0, '<RONNIE>': 0.0, '<MaximumDepthTree>': 0.0, '<SEAN>': 0.0, '<HEATHER>': 0.0, '<BRADLEY>': 0.0, '<IAN>': 0.0}
{'different i': 0.050718512256973804, u'<VBD>': 0.020287404902789522, 'people': 0.10143702451394761, 'not </s>': 0.050718512256973804, 'wa the': 0.050718512256973804, u'<nsubj> i': 0.08013524936601861, '<?> people': 0.050718512256973804, '<JANE>': 0.000548655447529841, '<PHIL>': 0.0017022900659201506, '<STACEY>': 0.0007847778019856529, 'it wa': 0.050718512256973804, 'suppose </s>': 0.050718512256973804, u'<ROOT> wa': 0.040067624683009305, 'with me': 0.050718512256973804, '<GARRY>': 0.001295652101070703, u'<PRP>': 0.14201183431952663, '</s> look': 0.050718512256973804, 'different': 0.10143702451394761, u'<ROOT> react': 0.040067624683009305, 'benwell': 0.10143702451394761, 'with ben': 0.050718512256973804, '<TANYA>': 0.00162395157031883

{'<s> he': 0.17509727626459146, 'found it': 0.17509727626459146, '<?> why': 0.17509727626459146, '<MAX>': 0.0018961430522065725, 'you up': 0.17509727626459146, u'<ROOT> found': 0.13832684824902725, '<SEAN>': 0.0024650357320810234, '<JANE>': 0.004927883291517039, u'<IN>': 0.07003891050583659, '<STACEY>': 0.0018582591677956128, 'to <?>': 0.17509727626459146, u'<RP>': 0.07003891050583659, u'<VBN>': 0.07003891050583659, '<GARRY>': 0.002565547720910513, 'it <!>': 0.17509727626459146, '<MINTY>': 0.002018873144517164, 'up to': 0.17509727626459146, '<BRADLEY>': 0.004765502105766376, '<IAN>': 0.004884636061385419, '<SHIRLEY>': 0.0025617401852463476, '<!>': 0.3501945525291829, '<ROXY>': 0.001954934598013436, u'<PRP>': 0.21011673151750976, '<HEATHER>': 0.00045438577844879373, u'<WRB>': 0.07003891050583659, u'<nsubj> you': 0.13832684824902725, u'<dobj> it': 0.13832684824902725, '<RONNIE>': 0.004326563060005156, u'<ROOT> up': 0.13832684824902725, u'<VBZ>': 0.07003891050583659, '<?>': 1.050583657587

{'reckon whatever': 0.07085219444991146, u'<CC>': 0.028340877779964586, '<MAX>': 0.00038375209540730197, u'<nsubj> i': 0.1119464672308601, '<PHIL>': 0.0004502594865867097, 'whatever': 0.14170438889982293, 'sayyoure still': 0.07085219444991146, '<JANE>': 0.0015999354822783386, u'<IN>': 0.028340877779964586, 'a gentleman': 0.07085219444991146, u'<VB>': 0.028340877779964586, 'gentleman <?>': 0.07085219444991146, '<s> know': 0.07085219444991146, 'what i': 0.07085219444991146, 'still': 0.14170438889982293, u'<ROOT> reckon': 0.05597323361543005, 'for your': 0.07085219444991146, 'and a': 0.07085219444991146, '<TANYA>': 0.0028389500517004016, 'still waiting': 0.07085219444991146, u'<NNP>': 0.08502263333989377, 'gentleman': 0.14170438889982293, 'sayyoure': 0.14170438889982293, 'know what': 0.07085219444991146, 'waiting': 0.14170438889982293, u'<PRP$>': 0.028340877779964586, u'<VBG>': 0.028340877779964586, 'reckon <?>': 0.07085219444991146, '<SHIRLEY>': 0.001654676718703712, u'<WDT>': 0.02834087

{'<STACEY>': 0.0011392960247658154, u'<ROOT> not': 0.168583283935981, 'not </s>': 0.21339656194427972, 'u <?>': 0.21339656194427972, '<JANE>': 0.002576412239447327, '<PHIL>': 0.0022929405467129954, '<MaximumDepthTree>': 2.96, u'<ROOT> between': 0.168583283935981, '<GARRY>': 0.002102254507747974, '<TANYA>': 0.008019735768044043, '<MINTY>': 0.005916438491834415, '<MAX>': 0.005878608923615168, 'course not': 0.21339656194427972, 'course': 0.42679312388855944, '<IAN>': 0.0012615413169855952, '<SHIRLEY>': 0.001180867024371854, '<ROXY>': 0.001054899850985068, u'<PRP>': 0.0853586247777119, '<HEATHER>': 0.004578560745550646, '<RONNIE>': 0.00587272339546955, '<s> between': 0.21339656194427972, 'of course': 0.21339656194427972, '<?>': 0.42679312388855944, '<BRADLEY>': 0.003469029127721588, '<SEAN>': 0.001281950820270956, '<CHRISTIAN>': 0.007364129233451771, u'<IN>': 0.0853586247777119, '<?> of': 0.21339656194427972, '<STEVEN>': 0.0010420098888839183, u'<RB>': 0.2560758743331357, 'between u': 0.21

{'thanks for': 0.07458563535911603, 'a problem': 0.07458563535911603, '<STACEY>': 0.0007833170404189084, u'<RB>': 0.059668508287292824, u'<>': 0.029834254143646412, '<MAX>': 0.0016849941427524383, '<pause> we': 0.07458563535911603, 'calling </s>': 0.07458563535911603, 'yeah': 0.14917127071823205, '<s> no': 0.07458563535911603, '<SEAN>': 0.0008066752508641801, 'you mr': 0.07458563535911603, u'<IN>': 0.059668508287292824, '<MaximumDepthTree>': 1.48, 'thanks': 0.14917127071823205, u'<VB>': 0.059668508287292824, '<ROXY>': 0.0005989188524461513, '<JACK>': 0.0006659059979846352, u'<RP>': 0.029834254143646412, 'absolute pleasure': 0.07458563535911603, '<GARRY>': 0.0015821296960399221, 'absolute': 0.14917127071823205, 'problem <pause>': 0.07458563535911603, '<CLARE>': 0.0022436872249232296, 'sort that': 0.07458563535911603, '<pause>': 0.2983425414364641, '<TANYA>': 0.003387569967071526, u'<JJ>': 0.029834254143646412, u'<NNP>': 0.029834254143646412, 'yeah itll': 0.07458563535911603, 'it not': 0

{'<s> yeah': 0.25586353944562895, '<STACEY>': 0.004251132131237922, '<MAX>': 0.004305584902028455, 'yeah': 0.5117270788912579, '<SEAN>': 0.0027380519482718153, '<JANE>': 0.0058774790078638985, '<PHIL>': 0.004124927903363304, '<MaximumDepthTree>': 1.48, 'maybe later': 0.25586353944562895, '<GARRY>': 0.009170050176841468, u'<ROOT> later': 0.2021321961620469, '<MINTY>': 0.0027625914390052428, '<IAN>': 0.0041241331223438454, '<SHIRLEY>': 0.004237770018894292, '<ROXY>': 0.0026969994647726815, '<HEATHER>': 0.004166303992670136, u'<ROOT> yeah': 0.2021321961620469, '<s> maybe': 0.25586353944562895, '<RONNIE>': 0.004149165475154035, '<BRADLEY>': 0.0014122161660128296, 'maybe': 0.5117270788912579, u'<RB>': 0.2046908315565032, 'later': 0.5117270788912579, 'later </s>': 0.25586353944562895, '<JACK>': 0.006015530699987691, '<TANYA>': 0.012756629067209292, 'yeah </s>': 0.25586353944562895, '<STEVEN>': 0.0014118609235755067, u'<UH>': 0.1023454157782516}
{'<MaximumDepthTree>': 2.96, '<SEAN>': 0.001374

{u'<VBD>': 0.07324516785350967, 'that we': 0.18311291963377418, '<MAX>': 0.0038909602919291087, '<SEAN>': 0.0009769238005329502, '<JANE>': 0.0028810613177201053, u'<IN>': 0.07324516785350967, 'we fancied': 0.18311291963377418, '<STACEY>': 0.006914643644101407, u'<ROOT> what': 0.14465920651068162, '<GARRY>': 0.0031058809212705933, 'fancied billy': 0.18311291963377418, '<TANYA>': 0.004859398692705757, '<MINTY>': 0.00194656420495456, '<IAN>': 0.006964333446974401, u'<ROOT> fancied': 0.14465920651068162, '<SHIRLEY>': 0.0009883573237025678, 'billy <?>': 0.18311291963377418, '<ROXY>': 0.0028596013875429377, u'<PRP>': 0.07324516785350967, '<HEATHER>': 0.0009778536236686906, u'<WP>': 0.07324516785350967, '<RONNIE>': 0.002907655946217427, 'what <?>': 0.18311291963377418, '<?>': 0.7324516785350967, '<BRADLEY>': 0.003910163807192515, '<CHRISTIAN>': 0.007923487982445226, '<MaximumDepthTree>': 1.48, u'<RB>': 0.07324516785350967, '<s> what': 0.18311291963377418, '<JACK>': 0.0010284908626751615, 'bil

{'<STACEY>': 0.002060906003076582, 'dont': 0.18255578093306293, 'good day': 0.09127789046653147, '<MAX>': 0.002620797125528636, 'yeah': 0.18255578093306293, '<SEAN>': 0.002065879787793929, '<JANE>': 0.00158803625656688, u'<IN>': 0.03651115618661259, '<MaximumDepthTree>': 1.48, u'<VB>': 0.03651115618661259, 'ill': 0.18255578093306293, 'worry': 0.18255578093306293, 'that </s>': 0.09127789046653147, 'a good': 0.09127789046653147, 'day yeah': 0.09127789046653147, 'have a': 0.09127789046653147, 'sort that': 0.09127789046653147, '<TANYA>': 0.007626803400715588, u'<JJ>': 0.03651115618661259, u'<NNP>': 0.03651115618661259, 'dont worry': 0.09127789046653147, u'<dobj> sort': 0.07210953346855986, '<MINTY>': 0.0020800141104656725, 'worry babe': 0.09127789046653147, u'<DT>': 0.07302231237322518, u'<dobj> day': 0.07210953346855986, '<s> dont': 0.09127789046653147, '<IAN>': 0.0010079399133138126, 'sort': 0.18255578093306293, '<SHIRLEY>': 0.0011806961826496102, 'babe ill': 0.09127789046653147, 'good':

{'<STACEY>': 0.0023705860796514287, '<MAX>': 0.005527350917400257, 'working <!>': 0.1459262261856506, '<JANE>': 0.0016040658965689257, '<MaximumDepthTree>': 2.96, u'<VB>': 0.058370490474260246, u'<VBN>': 0.058370490474260246, '<GARRY>': 0.0025365428014582276, 'working': 0.2918524523713012, '<TANYA>': 0.0038363411608249177, '<MINTY>': 0.002584149795502765, '<!> youre': 0.1459262261856506, 'be working': 0.1459262261856506, u'<TO>': 0.058370490474260246, u'<VBG>': 0.058370490474260246, 'youre supposed': 0.1459262261856506, '<IAN>': 0.0024113918107926363, '<SHIRLEY>': 0.00405055452682618, '<!>': 0.5837049047426024, 'oi <!>': 0.1459262261856506, '<ROXY>': 0.0031291509294504296, u'<PRP>': 0.058370490474260246, u'<ROOT> supposed': 0.11528171868666397, '<HEATHER>': 0.0008320885670897127, '<s> oi': 0.1459262261856506, '<RONNIE>': 0.0034878669037632327, u'<VBZ>': 0.058370490474260246, '<SEAN>': 0.0039672222469057964, u'<ROOT> oi': 0.11528171868666397, 'to be': 0.1459262261856506, '<CHRISTIAN>': 

{'gone': 0.33686837180286966, '<STACEY>': 0.0046071193332593114, '<!> youve': 0.16843418590143483, u'<ROOT> heaving': 0.13306300686213351, 'an hour': 0.16843418590143483, '<SEAN>': 0.0037981229707692403, '<JANE>': 0.0058319673531274825, '<PHIL>': 0.0009540517063496411, 'heaving': 0.33686837180286966, u'<VB>': 0.06737367436057394, u'<VBN>': 0.13474734872114788, '<GARRY>': 0.0037613015215497455, 'hour </s>': 0.16843418590143483, '<TANYA>': 0.0076981254470721554, 'there </s>': 0.16843418590143483, 'gone an': 0.16843418590143483, '<MINTY>': 0.0015329415083537039, '<BRADLEY>': 0.001365118213131095, u'<PRP$>': 0.06737367436057394, '<MAX>': 0.006406567700541187, 'heaving down': 0.16843418590143483, '<IAN>': 0.0009078691731646874, '<SHIRLEY>': 0.0036614069297175554, '<!>': 0.33686837180286966, 'oi <!>': 0.16843418590143483, '<ROXY>': 0.0004522196429293904, u'<PRP>': 0.06737367436057394, '<HEATHER>': 0.0014719012851884244, '<s> oi': 0.16843418590143483, 'it heaving': 0.16843418590143483, '<RONN

{'jamaica': 0.09044089938449945, 'this straight': 0.045220449692249726, 'ship that': 0.045220449692249726, u'<VB>': 0.09044089938449945, '<MAX>': 0.00023663570853059177, 'be on': 0.045220449692249726, 'not even': 0.045220449692249726, '<JANE>': 0.0007822410984217767, '<PHIL>': 0.000994553142589593, '<STACEY>': 0.000248552739054741, 'want': 0.09044089938449945, 'cruise': 0.09044089938449945, 'ship': 0.09044089938449945, 'work': 0.09044089938449945, 'me get': 0.045220449692249726, 'a cruise': 0.045220449692249726, '<GARRY>': 0.0012674971148557397, u'<PRP>': 0.05426453963069968, 'even': 0.09044089938449945, u'<dobj> this': 0.035724155256877284, 'week off': 0.045220449692249726, u'<MD>': 0.036176359753799785, 'straight </s>': 0.045220449692249726, '<TANYA>': 0.002090910653392081, 'you want': 0.045220449692249726, u'<JJ>': 0.018088179876899892, u'<NNP>': 0.036176359753799785, u'<nsubj> me': 0.035724155256877284, 'two': 0.09044089938449945, '<MINTY>': 0.0010574031692894546, 'to get': 0.04522

{u'<VBD>': 0.02962353425221149, u'<VB>': 0.02962353425221149, 'tell you': 0.07405883563052873, 'and you': 0.07405883563052873, u'<WP>': 0.02962353425221149, u'<nsubj> i': 0.0585064801481177, '<PHIL>': 0.0011177485239590754, '<SEAN>': 0.0019768088937976783, '<JANE>': 0.001183127933783136, 'shame': 0.14811767126105746, '<STACEY>': 0.002326167786298355, 'them other': 0.07405883563052873, 'and i': 0.07405883563052873, '<GARRY>': 0.0006016401630656722, 'you what': 0.07405883563052873, u'<ROOT> tell': 0.0585064801481177, 'other kid': 0.07405883563052873, '<TANYA>': 0.0016157301987297032, 'to shame': 0.07405883563052873, u'<JJ>': 0.02962353425221149, u'<CC>': 0.05924706850442298, 'put them': 0.07405883563052873, u'<ROOT> re': 0.0585064801481177, u'<dobj> me': 0.0585064801481177, u'<TO>': 0.02962353425221149, 'you were': 0.07405883563052873, u'<VBG>': 0.02962353425221149, '<s> youre': 0.07405883563052873, 'tell': 0.14811767126105746, '<IAN>': 0.001556969524557878, '<SHIRLEY>': 0.00075730225238

{'for yourself': 0.11614776576867239, '<s> yeah': 0.11614776576867239, 'stink': 0.23229553153734478, '<STACEY>': 0.003156048611892618, 'to say': 0.11614776576867239, 'dont': 0.23229553153734478, 'ron': 0.23229553153734478, 'do it': 0.11614776576867239, 'me <pause>': 0.11614776576867239, '<pause> do': 0.11614776576867239, 'you stink': 0.11614776576867239, 'hate to': 0.11614776576867239, 'yeah': 0.23229553153734478, 'dont do': 0.11614776576867239, '<JANE>': 0.0008013644935002101, u'<IN>': 0.13937731892240687, 'say': 0.23229553153734478, '<MaximumDepthTree>': 2.96, 'you dont': 0.11614776576867239, 'hate': 0.23229553153734478, '<GARRY>': 0.0007748812447859045, u'<ROOT> stink': 0.09175673495725119, u'<dobj> this': 0.09175673495725119, '<pause> but': 0.11614776576867239, '<pause>': 0.46459106307468956, '<TANYA>': 0.0032579810447234555, 'this for': 0.11614776576867239, u'<NNP>': 0.04645910630746896, '<MINTY>': 0.0019902406755978, 'it for': 0.11614776576867239, u'<CC>': 0.04645910630746896, u'

{'<MaximumDepthTree>': 4.4399999999999995, '<SEAN>': 0.001450011258442323, '<JANE>': 0.01567664806767888, u'<IN>': 0.05263157894736841, u'<VB>': 0.05263157894736841, 'ian': 0.26315789473684204, 'away': 0.26315789473684204, u'<NNP>': 0.05263157894736841, '<s> stay': 0.13157894736842102, '<SHIRLEY>': 0.0027731804815170603, '<ROXY>': 0.0028834731977465935, u'<ROOT> stay': 0.10394736842105262, 'stay away': 0.13157894736842102, 'stay': 0.26315789473684204, 'ian </s>': 0.13157894736842102, '<RONNIE>': 0.0030278419535152423, '<CHRISTIAN>': 0.006787861367496708, 'away from': 0.13157894736842102, 'from ian': 0.13157894736842102, u'<RB>': 0.05263157894736841, '<JACK>': 0.004023375047146938, '<CLARE>': 0.0015355033632983447}
{'ive': 0.242098184263618, '<STACEY>': 0.0025629015480018555, u'<nsubj> i': 0.09562878278412912, '<SEAN>': 0.002484866145973709, '<JANE>': 0.0011886469443852905, '<PHIL>': 0.00119118785916642, '<MaximumDepthTree>': 2.96, u'<VB>': 0.09683927370544722, u'<VBN>': 0.0484196368527

{'<MaximumDepthTree>': 1.48, '<SEAN>': 0.004034895306390047, '<JANE>': 0.006320999405834095, u'<IN>': 0.07302231237322518, 'that </s>': 0.18255578093306293, '<TANYA>': 0.014826618938490774, '<MINTY>': 0.0021046677250549616, u'<dobj> that': 0.14421906693711972, '<IAN>': 0.004174945520738095, '<SHIRLEY>': 0.0021411491835200765, 'you know': 0.18255578093306293, u'<PRP>': 0.07302231237322518, '<HEATHER>': 0.004400441331948183, u'<nsubj> you': 0.14421906693711972, u'<VBP>': 0.07302231237322518, '<s> you': 0.18255578093306293, '<RONNIE>': 0.002442796120269034, u'<ROOT> know': 0.14421906693711972, 'know': 0.36511156186612587, 'know that': 0.18255578093306293, '<BRADLEY>': 0.004152352926908502, '<CLARE>': 0.0021839643356009685, '<JACK>': 0.006158345675833514}
{'<STACEY>': 0.0021337617085028077, '<MAX>': 0.0020486210939790186, '<JANE>': 0.0019840464176711595, '<PHIL>': 0.0021869626976850653, '<MaximumDepthTree>': 1.48, 'it only': 0.18595041322314051, 'a jumper': 0.18595041322314051, '<TANYA>': 

{'cup of': 0.06428571428571425, u'<VB>': 0.05142857142857141, '<pause>': 0.1285714285714285, 'you want': 0.06428571428571425, u'<VBG>': 0.025714285714285703, 'must': 0.1285714285714285, '<IAN>': 0.0010488727020140141, '<ROXY>': 0.0005690858421870379, '<HEATHER>': 0.00037313680755137043, u'<ROOT> want': 0.05078571428571427, 'myself a': 0.06428571428571425, 'gonna': 0.1285714285714285, u'<VBP>': 0.05142857142857141, 'a cup': 0.06428571428571425, 'im gonna': 0.06428571428571425, u'<NN>': 0.07714285714285711, '<SEAN>': 0.0014756531237014167, '<CHRISTIAN>': 0.0006356378434353011, '<CLARE>': 0.0016726991499400289, 'married': 0.1285714285714285, '<s> must': 0.06428571428571425, 'guy': 0.1285714285714285, u'<RB>': 0.05142857142857141, 'of tea': 0.06428571428571425, u'<nsubj> m': 0.05078571428571427, 'tea': 0.1285714285714285, u'<nsubj> i': 0.05078571428571427, '<JANE>': 0.0012629909048023306, '<PHIL>': 0.0012443538005354935, 'creep': 0.1285714285714285, 'whether he': 0.06428571428571425, 'make

{'with and': 0.059243006034009865, '<MaximumDepthTree>': 5.92, 'do it': 0.11848601206801973, u'<>': 0.07109160724081184, '<MAX>': 0.0010465973140990795, '<pause> we': 0.059243006034009865, u'<nsubj> i': 0.0468019747668678, '<s> i': 0.059243006034009865, 'need to': 0.11848601206801973, '<PHIL>': 0.000824976349562215, u'<dobj> i': 0.0468019747668678, 'i <?>': 0.059243006034009865, 'done': 0.11848601206801973, '<MINTY>': 0.000984386241128048, u'<VB>': 0.16588041689522762, 'need': 0.1777290181020296, 'wont': 0.11848601206801973, '<CLARE>': 0.000658144285729148, 'done get': 0.059243006034009865, u'<VBN>': 0.023697202413603943, u'<ROOT> need': 0.0468019747668678, '<pause>': 0.3554580362040592, '<pause> wont': 0.059243006034009865, u'<RP>': 0.023697202413603943, u'<JJ>': 0.07109160724081184, u'<NNP>': 0.023697202413603943, 'just <pause>': 0.059243006034009865, u'<nsubj> it': 0.0936039495337356, u'<dobj> it': 0.0936039495337356, 'get it': 0.11848601206801973, 'get': 0.1777290181020296, u'<MD>'

{'ive': 0.04905300449652541, '<STACEY>': 0.000298229747878524, u'<VB>': 0.03924240359722033, 'tell you': 0.024526502248262704, u'<dobj> you': 0.019375936776127537, 'it back': 0.024526502248262704, u'<nsubj> i': 0.038751873552255074, 'back': 0.04905300449652541, 'lucky if': 0.024526502248262704, '<JANE>': 0.0008698782896944244, '<PHIL>': 0.0005688228941481103, 'way shes': 0.024526502248262704, 'ive got': 0.024526502248262704, 'shes': 0.04905300449652541, 'her bird': 0.024526502248262704, u'<IN>': 0.009810600899305083, 'if she': 0.024526502248262704, '<GARRY>': 0.0004212555204828084, u'<PRP>': 0.058863605395830486, 'you what': 0.024526502248262704, u'<ROOT> tell': 0.019375936776127537, 'well i': 0.024526502248262704, u'<ROOT> shell': 0.019375936776127537, 'got her': 0.024526502248262704, u'<JJ>': 0.009810600899305083, 'stuffed </s>': 0.024526502248262704, 'and the': 0.024526502248262704, '<MINTY>': 0.00030136948352914116, u'<VBN>': 0.019621201798610165, u'<CC>': 0.009810600899305083, u'<

{'<STACEY>': 0.002422311504827196, '<MAX>': 0.0022068509523063156, '<SEAN>': 0.001182674489412551, '<JANE>': 0.0012584450854858742, '<PHIL>': 0.0012615645827562, '<MaximumDepthTree>': 1.48, 'hell are': 0.112079701120797, 'hell': 0.224159402241594, '<GARRY>': 0.0036709470069608012, 'what the': 0.112079701120797, '<TANYA>': 0.004863502855164074, u'<DT>': 0.0448318804483188, u'<VBG>': 0.0448318804483188, 'doing <?>': 0.112079701120797, '<IAN>': 0.001257886347876291, '<SHIRLEY>': 0.00699609869851185, u'<PRP>': 0.0448318804483188, u'<nsubj> you': 0.08854296388542963, u'<ROOT> doing': 0.08854296388542963, u'<WP>': 0.0448318804483188, '<RONNIE>': 0.0024643970331646166, u'<NN>': 0.0448318804483188, u'<VBP>': 0.0448318804483188, '<?>': 0.224159402241594, 'are you': 0.112079701120797, '<CHRISTIAN>': 0.0036685136145245337, 'the hell': 0.112079701120797, '<STEVEN>': 0.0012499211540408246, '<s> what': 0.112079701120797, 'you doing': 0.112079701120797, u'<dobj> hell': 0.08854296388542963}
{u'<ROOT> 

{'<STACEY>': 0.0059908814076052985, '<MAX>': 0.005759159031433691, '<s> no': 0.25586353944562895, '<SEAN>': 0.007220057161206615, '<JANE>': 0.008793015000092374, '<PHIL>': 0.0057085830745097575, '<MaximumDepthTree>': 1.48, u'<VB>': 0.1023454157782516, u'<ROOT> no': 0.2021321961620469, 'no <?>': 0.25586353944562895, '<TANYA>': 0.005803438202354201, '<?>': 1.0234541577825158, u'<ROOT> come': 0.2021321961620469, '<MINTY>': 0.0016808552062702756, 'come <?>': 0.25586353944562895, '<IAN>': 0.008727191400518745, '<SHIRLEY>': 0.0015014157344113684, '<ROXY>': 0.0030814197687304874, '<HEATHER>': 0.002933726246948179, '<RONNIE>': 0.0014676273747105406, 'how come': 0.25586353944562895, 'come': 0.5117270788912579, '<BRADLEY>': 0.007406289362717891, u'<WRB>': 0.1023454157782516, '<CHRISTIAN>': 0.00229239379935691, '<?> how': 0.25586353944562895, '<STEVEN>': 0.0014722603323235651, '<CLARE>': 0.0014382464571902048, '<JACK>': 0.0029238668788522802, u'<UH>': 0.1023454157782516}
{u'<VBD>': 0.049603858077

{'<s> yeah': 0.07337953526294334, '<STACEY>': 0.002008101397847753, 'i feel': 0.07337953526294334, 'feel': 0.1467590705258867, 'for a': 0.07337953526294334, '<JANE>': 0.0008133197314024225, u'<nsubj> i': 0.057969832857725244, 'yeah': 0.1467590705258867, '<SEAN>': 0.0017188105830326845, 'sitter dad': 0.07337953526294334, '<PHIL>': 0.0006210199687738415, '<MaximumDepthTree>': 4.4399999999999995, '<MINTY>': 0.0007966128105725394, u'<VB>': 0.058703628210354686, 'if youre': 0.07337953526294334, u'<nsubj> dad': 0.057969832857725244, u'<RP>': 0.029351814105177343, '<GARRY>': 0.00032928222366252516, u'<PRP>': 0.11740725642070937, 'get a': 0.07337953526294334, 'youre looking': 0.07337953526294334, 'them out': 0.07337953526294334, '<pause>': 0.2935181410517734, '<TANYA>': 0.0036133912649822754, u'<JJ>': 0.029351814105177343, u'<NNP>': 0.029351814105177343, 'ill take': 0.07337953526294334, 'looking': 0.1467590705258867, u'<ROOT> looking': 0.057969832857725244, 'take': 0.1467590705258867, u'<DT>':

{'swelling': 0.18660101075547492, '<MaximumDepthTree>': 8.879999999999999, u'<VB>': 0.03732020215109499, 'go': 0.18660101075547492, 'bit': 0.18660101075547492, u'<nsubj> tanyas': 0.0737073992484126, u'<ROOT> need': 0.0737073992484126, u'<VBG>': 0.03732020215109499, '<IAN>': 0.0031824455413791645, '<ROXY>': 0.0012866622580991183, 'go </s>': 0.09330050537773746, 'get': 0.18660101075547492, 'killer': 0.18660101075547492, u'<VBP>': 0.11196060645328498, 'probably': 0.18660101075547492, u'<NN>': 0.11196060645328498, '<SEAN>': 0.0021928770281147677, u'<nsubj> that': 0.0737073992484126, '<CHRISTIAN>': 0.001816160093001174, u'<RB>': 0.11196060645328498, '<CLARE>': 0.0006338234571345094, 'the swelling': 0.09330050537773746, 'probably got': 0.09330050537773746, u'<nsubj> i': 0.0737073992484126, '<JANE>': 0.0011431600480200435, '<PHIL>': 0.001206540858108148, 'need something': 0.09330050537773746, u'<dobj> something': 0.0737073992484126, u'<NNP>': 0.03732020215109499, u'<ROOT> go': 0.0737073992484

{'blind': 0.20845396641574981, 'be blind': 0.10422698320787491, '<STACEY>': 0.002256783158722066, u'<VB>': 0.041690793283149966, u'<CC>': 0.041690793283149966, '<MAX>': 0.004332294408307261, '<JANE>': 0.001029413007755886, '<MaximumDepthTree>': 2.96, 'desperate': 0.20845396641574981, '<TANYA>': 0.0010967903184984573, u'<JJ>': 0.08338158656629993, '<MINTY>': 0.0010587990005741367, u'<MD>': 0.041690793283149966, '<IAN>': 0.004987932096919498, '<SHIRLEY>': 0.0010692269934301005, '<ROXY>': 0.004382971379610472, u'<PRP>': 0.041690793283149966, u'<nsubj> you': 0.08233931673422118, '<s> you': 0.10422698320787491, '<RONNIE>': 0.003132525032579367, 'blind or': 0.10422698320787491, 'must': 0.20845396641574981, u'<ROOT> be': 0.08233931673422118, '<CHRISTIAN>': 0.005810957148582095, '<CLARE>': 0.0010681325853043812, 'must be': 0.10422698320787491, 'or desperate': 0.10422698320787491, 'desperate </s>': 0.10422698320787491, 'you must': 0.10422698320787491}
{'<STACEY>': 0.0036804242870568534, '<MAX>'

{u'<VBD>': 0.0789041095890411, 'wa </s>': 0.19726027397260273, '<SEAN>': 0.0034117553798303224, '<JANE>': 0.0034251537610012596, '<PHIL>': 0.003379307999575792, '<STACEY>': 0.00681008976506872, 'it wa': 0.19726027397260273, u'<VB>': 0.0789041095890411, 'be alright': 0.19726027397260273, '<TANYA>': 0.007958674532603351, u'<JJ>': 0.0789041095890411, '<MINTY>': 0.0010793202050147044, u'<nsubj> it': 0.15583561643835617, u'<PRP$>': 0.0789041095890411, u'<TO>': 0.0789041095890411, '<IAN>': 0.0033111463251429872, '<SHIRLEY>': 0.0022602259700956404, '<ROXY>': 0.0034591822668712047, u'<PRP>': 0.0789041095890411, '<HEATHER>': 0.004615446021285964, 'gonna': 0.39452054794520547, u'<ROOT> wa': 0.15583561643835617, '<RONNIE>': 0.003270515504794824, 'gonna be': 0.19726027397260273, 'alright </s>': 0.19726027397260273, u'<NN>': 0.0789041095890411, u'<nsubj> gon': 0.15583561643835617, '<BRADLEY>': 0.003385624126098139, u'<ROOT> be': 0.15583561643835617, '<CHRISTIAN>': 0.0017795875137303035, '<MaximumDe

{'look he': 0.1356102461074837, u'<VBD>': 0.10848819688598696, u'<VB>': 0.05424409844299348, '<PHIL>': 0.002632291013030561, '<MAX>': 0.0022586176333123462, u'<nsubj> i': 0.10713209442491212, '<s> i': 0.1356102461074837, '<JANE>': 0.002218338294944909, 'him out': 0.1356102461074837, 'he wa': 0.1356102461074837, '<STACEY>': 0.003840024666633799, 'desperate': 0.2712204922149674, 'i wa': 0.1356102461074837, '<s> look': 0.1356102461074837, 'wa just': 0.1356102461074837, '<JACK>': 0.0030016018048270752, u'<RP>': 0.05424409844299348, '<GARRY>': 0.0017941930542201318, 'you worried': 0.1356102461074837, 'wa desperate': 0.1356102461074837, '<TANYA>': 0.00470507269480425, u'<JJ>': 0.10848819688598696, '<MINTY>': 0.0007373239615273329, 'just helping': 0.1356102461074837, u'<VBG>': 0.05424409844299348, '<IAN>': 0.00304253010923557, '<SHIRLEY>': 0.004965019430241727, '<ROXY>': 0.0022429928493310734, u'<PRP>': 0.21697639377197392, u'<ROOT> are': 0.10713209442491212, 'what are': 0.1356102461074837, '

{u'<nsubj> ronnies': 0.03170568561872911, 'if you': 0.040133779264214055, u'<VBD>': 0.03210702341137124, 'ronnies': 0.08026755852842811, 'piece of': 0.040133779264214055, 'one': 0.08026755852842811, '<JANE>': 0.0004495345940874091, u'<IN>': 0.048160535117056875, '<MaximumDepthTree>': 4.4399999999999995, 'the one': 0.040133779264214055, u'<VB>': 0.01605351170568562, u'<VBZ>': 0.01605351170568562, 'you wanna': 0.040133779264214055, '<JACK>': 0.00303926005893033, 'happiness': 0.08026755852842811, 'had in': 0.040133779264214055, 'long </s>': 0.040133779264214055, '<TANYA>': 0.0017273895059554322, 'god': 0.08026755852842811, 'ruin': 0.08026755852842811, 'know how': 0.040133779264214055, 'long': 0.08026755852842811, u'<NNPS>': 0.01605351170568562, 'one piece': 0.040133779264214055, u'<DT>': 0.01605351170568562, '<IAN>': 0.00040497597931763167, 'wanna ruin': 0.040133779264214055, '<ROXY>': 0.000839423051944809, u'<PRP>': 0.01605351170568562, 'how long': 0.040133779264214055, 'ronnies had': 0.

{'<STACEY>': 0.008987308200167745, 'dont': 0.3768318213538032, '<MAX>': 0.0061784649216225715, '<SEAN>': 0.0015600405112572947, 'bradley': 0.5652477320307049, u'<IN>': 0.07536636427076066, '<MaximumDepthTree>': 1.48, 'you dont': 0.1884159106769016, '<JACK>': 0.002629326725532892, 'that </s>': 0.1884159106769016, '<TANYA>': 0.00777289694014016, '<?>': 0.3768318213538032, u'<NNP>': 0.1507327285415213, u'<DT>': 0.07536636427076066, u'<TO>': 0.07536636427076066, '<HEATHER>': 0.002509682514848371, '<IAN>': 0.0025650110395065828, '<SHIRLEY>': 0.0046637672729085075, '<!>': 0.3768318213538032, '<ROXY>': 0.0016868909392732264, u'<PRP>': 0.07536636427076066, 'like that': 0.1884159106769016, 'have to': 0.1884159106769016, u'<nsubj> you': 0.14884856943475228, u'<VBP>': 0.07536636427076066, '<s> bradley': 0.3768318213538032, 'dont have': 0.1884159106769016, '<BRADLEY>': 0.002043185988902437, '<JANE>': 0.0005087531534465749, 'like': 0.3768318213538032, '<CHRISTIAN>': 0.005335982574258788, '<PHIL>': 

{u'<VBD>': 0.042670881074674046, u'<RB>': 0.12801264322402217, '<PHIL>': 0.0014608539198722437, '<MAX>': 0.0022604237405602487, u'<nsubj> i': 0.16854998024496248, '<s> i': 0.10667720268668512, '<JANE>': 0.0018064792191790826, u'<IN>': 0.042670881074674046, '<STACEY>': 0.000338710032569299, 'im': 0.21335440537337025, '<MINTY>': 0.0018010847645591665, u'<VB>': 0.042670881074674046, 'i havent': 0.10667720268668512, 'seen': 0.21335440537337025, '<JACK>': 0.001842155611287268, u'<VBN>': 0.042670881074674046, '<GARRY>': 0.0018030329980865242, u'<dobj> her': 0.08427499012248124, '<TANYA>': 0.0053034355011441405, 'nervous': 0.21335440537337025, u'<JJ>': 0.08534176214934809, 'when did': 0.10667720268668512, 'long': 0.21335440537337025, '<BRADLEY>': 0.0008941680168361139, 'course im': 0.10667720268668512, '<IAN>': 0.0014436844882445227, '<SHIRLEY>': 0.001197606027899149, '<s> of': 0.10667720268668512, u'<PRP>': 0.12801264322402217, '<HEATHER>': 0.0002667886993626712, 'how long': 0.10667720268668

{'because when': 0.09280476626947755, '<s> because': 0.09280476626947755, 'he wanted': 0.09280476626947755, 'that <?>': 0.09280476626947755, '<s> can': 0.09280476626947755, 'he knew': 0.09280476626947755, 'the thing': 0.09280476626947755, '<MaximumDepthTree>': 5.92, u'<VB>': 0.2598533455545371, 'is the': 0.09280476626947755, 'you see': 0.09280476626947755, u'<NN>': 0.14848762603116408, 'know <?>': 0.09280476626947755, 'wanted </s>': 0.09280476626947755, 'or die': 0.09280476626947755, 'betray': 0.1856095325389551, 'do that': 0.1856095325389551, '<IAN>': 0.002168198593460124, 'can you': 0.09280476626947755, 'betray him': 0.09280476626947755, 'i wish': 0.09280476626947755, '<HEATHER>': 0.0006808925260979259, 'could do': 0.09280476626947755, u'<VBP>': 0.1856095325389551, '<s> or': 0.09280476626947755, 'know': 0.27841429880843266, 'sane you': 0.09280476626947755, u'<WRB>': 0.03712190650779102, '<CHRISTIAN>': 0.0014659335061502298, 'die': 0.1856095325389551, u'<RB>': 0.14848762603116408, 'an

{u'<VBD>': 0.12702146427521321, 'contacted': 0.31755366068803303, '<MAX>': 0.0026095434088687355, u'<nsubj> i': 0.3763010879153192, '<s> i': 0.31755366068803303, '<JANE>': 0.001302396446036454, '<PHIL>': 0.0030557919951840476, '<STACEY>': 0.005403033441645418, 'im': 0.31755366068803303, '<MINTY>': 0.0013066417987891293, 'im not': 0.15877683034401652, '<GARRY>': 0.0021055204377025127, 'going </s>': 0.15877683034401652, 'i never': 0.15877683034401652, 'never contacted': 0.15877683034401652, u'<ROOT> going': 0.12543369597177306, 'told you': 0.15877683034401652, u'<ROOT> contacted': 0.12543369597177306, 'going': 0.31755366068803303, 'them </s>': 0.15877683034401652, u'<VBG>': 0.06351073213760661, 'told': 0.31755366068803303, '<SHIRLEY>': 0.004009354861969704, '<ROXY>': 0.001320853020235381, u'<PRP>': 0.31755366068803303, u'<ROOT> told': 0.12543369597177306, '<HEATHER>': 0.001282005671630157, 'never': 0.31755366068803303, u'<VBP>': 0.06351073213760661, u'<dobj> them': 0.12543369597177306, '

{'i could': 0.10349288486416561, '<GARRY>': 0.0018659096230267585, u'<VBD>': 0.041397153945666246, 'of my': 0.10349288486416561, 'if i': 0.10349288486416561, '<MAX>': 0.0011488105916424708, 'of me': 0.10349288486416561, u'<nsubj> i': 0.2452781371280725, '<PHIL>': 0.000844053409632153, '<s> i': 0.10349288486416561, '<JANE>': 0.0007021076797956427, u'<IN>': 0.20698576972833121, 'understand jack': 0.10349288486416561, '<STACEY>': 0.00318567412354407, '<MINTY>': 0.0011258564082760407, 'want': 0.20698576972833121, 'everything out': 0.10349288486416561, '<JACK>': 0.0011360842112832395, u'<RP>': 0.041397153945666246, 'scrape it': 0.10349288486416561, u'<ROOT> scrape': 0.08175937904269083, 'said': 0.20698576972833121, 'jack <?>': 0.10349288486416561, 'everything': 0.20698576972833121, '<TANYA>': 0.003227258473029261, u'<NNP>': 0.041397153945666246, 'me </s>': 0.10349288486416561, 'scrape': 0.20698576972833121, 'just scrape': 0.10349288486416561, u'<dobj> it': 0.08175937904269083, 'it out': 0.1

{'<STACEY>': 0.0026903474721441394, 'cant </s>': 0.22304832713754646, '<SEAN>': 0.00722348711856648, '<PHIL>': 0.0050815815231197515, 'cant': 0.44609665427509293, '<MaximumDepthTree>': 1.48, '<GARRY>': 0.0036028954637574886, u'<ROOT> ca': 0.1762081784386617, '<TANYA>': 0.012478499269650015, 'you cant': 0.22304832713754646, '<MINTY>': 0.002599080209847491, u'<MD>': 0.0892193308550186, '<IAN>': 0.0025173429358375603, '<SHIRLEY>': 0.0022393706375921666, '<ROXY>': 0.00486854920964094, u'<PRP>': 0.0892193308550186, '<HEATHER>': 0.002387912800745754, u'<nsubj> you': 0.1762081784386617, '<s> you': 0.22304832713754646, '<RONNIE>': 0.007405700932936866, '<BRADLEY>': 0.002704066235466425, '<CHRISTIAN>': 0.0036092880440184395, '<STEVEN>': 0.0025563558003055126, u'<RB>': 0.0892193308550186, '<JACK>': 0.0027195372162594432}
{'but we': 0.12614980289093294, 'we aint': 0.12614980289093294, u'<VB>': 0.05045992115637319, '<s> like': 0.12614980289093294, 'and minty': 0.12614980289093294, 'aint': 0.252299

{'ive done': 0.03469210754553339, 'knew what': 0.03469210754553339, u'<nsubj> stacey': 0.027406764960971382, '<MaximumDepthTree>': 5.92, 'do with': 0.03469210754553339, 'aint': 0.06938421509106678, '<pause>': 0.13876843018213356, 'want anything': 0.03469210754553339, 'told you': 0.03469210754553339, '<pause> thats': 0.03469210754553339, 'ive told': 0.03469210754553339, '<IAN>': 0.0006022087113204848, '<ROXY>': 0.0003800911433105456, 'to do': 0.03469210754553339, '<HEATHER>': 0.0005706231261177331, u'<ROOT> want': 0.027406764960971382, u'<VBP>': 0.013876843018213358, u'<dobj> anything': 0.027406764960971382, u'<ROOT> got': 0.027406764960971382, u'<NN>': 0.04163052905464008, '<SEAN>': 0.0007654492060971664, 'anything to': 0.03469210754553339, 'look ive': 0.03469210754553339, '<CLARE>': 0.00019023517352736202, 'what ive': 0.03469210754553339, u'<RB>': 0.06938421509106678, 'mean': 0.06938421509106678, 'shed': 0.06938421509106678, u'<VB>': 0.08326105810928014, u'<nsubj> i': 0.08222029488291

{'gone': 0.20785219399538105, u'<VBD>': 0.041570438799076216, u'<VB>': 0.08314087759815243, u'<CC>': 0.041570438799076216, '<MAX>': 0.0006177574688656644, u'<nsubj> i': 0.08210161662817553, '<s> i': 0.10392609699769052, '<JANE>': 0.001637999403656741, 'to wait': 0.10392609699769052, '<STACEY>': 0.0029287142013177747, 'but <pause>': 0.10392609699769052, u'<nsubj> heather': 0.08210161662817553, '<s> heather': 0.10392609699769052, u'<VBN>': 0.041570438799076216, '<GARRY>': 0.0037116283333969804, 'heather': 0.20785219399538105, '<pause>': 0.20785219399538105, '<TANYA>': 0.003994377574732351, u'<MD>': 0.041570438799076216, u'<NNP>': 0.041570438799076216, '<MINTY>': 0.0012842492222635456, 'mustve gone': 0.10392609699769052, u'<TO>': 0.041570438799076216, 'gone inside': 0.10392609699769052, 'told': 0.20785219399538105, '<SHIRLEY>': 0.0028333447291887196, '<ROXY>': 0.0005451944366225799, u'<PRP>': 0.08314087759815243, u'<ROOT> told': 0.08210161662817553, '<HEATHER>': 0.0017029776799902235, 'he

{'<s> yeah': 0.12974531475252285, 'if you': 0.12974531475252285, '<STACEY>': 0.0056172331143935755, u'<VB>': 0.051898125901009146, '<PHIL>': 0.0010624667994568117, '<MAX>': 0.001795661298014606, u'<nsubj> i': 0.10249879865449307, 'yeah': 0.2594906295050457, '<s> i': 0.12974531475252285, 'see if': 0.12974531475252285, u'<IN>': 0.051898125901009146, 'see': 0.2594906295050457, '<MaximumDepthTree>': 2.96, u'<ROOT> see': 0.10249879865449307, 'want': 0.2594906295050457, u'<RP>': 0.051898125901009146, 'find': 0.2594906295050457, '<GARRY>': 0.0011413124553586581, 'funny': 0.2594906295050457, 'you find': 0.12974531475252285, '<TANYA>': 0.005354242323814706, u'<JJ>': 0.051898125901009146, '<MINTY>': 0.000367933816805649, '<BRADLEY>': 0.001502743888758101, u'<DT>': 0.051898125901009146, 'yeah <?>': 0.12974531475252285, '<IAN>': 0.004342221555321767, '<SHIRLEY>': 0.0019244702205813541, '<!>': 0.2594906295050457, '<ROXY>': 0.00036851706927140557, u'<PRP>': 0.15569437770302744, '<HEATHER>': 0.001103

{'<MaximumDepthTree>': 1.48, '<SHIRLEY>': 0.014373286454663445, '<s> he': 0.23468057366362452, u'<nsubj> he': 0.18539765319426338, '<ROXY>': 0.00514115826041683, u'<PRP>': 0.0938722294654498, '<CHRISTIAN>': 0.0048356234292053135, '<TANYA>': 0.010761423996348512, '<MAX>': 0.005269384467376975, 'died </s>': 0.23468057366362452, '<SEAN>': 0.007533228962231987, '<JANE>': 0.002477197293245505, '<PHIL>': 0.0025944665062933745, 'he died': 0.23468057366362452, '<RONNIE>': 0.005117748497524857, '<BRADLEY>': 0.002515122019580647, u'<VBD>': 0.0938722294654498, u'<ROOT> died': 0.18539765319426338, 'died': 0.46936114732724904, '<IAN>': 0.007438726475563654}
{u'<VBD>': 0.039845047039291655, '<s> no': 0.09961261759822912, '<SEAN>': 0.001102951282413982, '<JANE>': 0.0022057111815979745, 'gentle': 0.19922523519645824, '<MaximumDepthTree>': 2.96, 'it wa': 0.09961261759822912, 'just a': 0.09961261759822912, 'wa just': 0.09961261759822912, '<JACK>': 0.001107973901019271, 'stretch': 0.19922523519645824, u'

{u'<VBD>': 0.024161073825503362, 'want': 0.12080536912751681, '<SHIRLEY>': 0.0013042338551527278, u'<CC>': 0.024161073825503362, 'one of': 0.060402684563758406, 'mind': 0.12080536912751681, 'so this': 0.060402684563758406, 'one': 0.12080536912751681, u'<IN>': 0.024161073825503362, '<STACEY>': 0.0023511051689272737, '<MINTY>': 0.000655031866574785, 'you dont': 0.060402684563758406, 'i hope': 0.060402684563758406, 'borrowed one': 0.060402684563758406, '<GARRY>': 0.0010236216646813253, 'your shirt': 0.060402684563758406, u'<nsubj> this': 0.047718120805369135, 'shirt': 0.12080536912751681, '<pause>': 0.24161073825503362, '<TANYA>': 0.0010279669764366867, 'you want': 0.060402684563758406, 'dont mind': 0.060402684563758406, 'oh <pause>': 0.060402684563758406, u'<nsubj> i': 0.09543624161073827, 'dont': 0.12080536912751681, u'<PRP$>': 0.024161073825503362, 'of your': 0.060402684563758406, 'want <?>': 0.060402684563758406, '<!> oh': 0.060402684563758406, 'hope': 0.12080536912751681, '<IAN>': 0.

{'<STACEY>': 0.001383234814281482, '<MaximumDepthTree>': 4.4399999999999995, '<MAX>': 0.0007055690631374244, '<SEAN>': 0.0007092343286958751, '<JANE>': 0.0028566465303546387, u'<IN>': 0.025307557117750446, 'professional': 0.1265377855887522, u'<VB>': 0.025307557117750446, 'out before': 0.0632688927943761, u'<RP>': 0.025307557117750446, '<GARRY>': 0.0011973736744365386, 'suicide': 0.1265377855887522, u'<dobj> suicide': 0.04998242530755713, 'to sort': 0.0632688927943761, '<TANYA>': 0.002273600746980681, u'<JJ>': 0.025307557117750446, u'<DT>': 0.025307557117750446, u'<TO>': 0.025307557117750446, 'professional suicide': 0.0632688927943761, '<IAN>': 0.0034667895683281706, 'sort': 0.1265377855887522, '<SHIRLEY>': 0.0007335555841089641, u'<PRP>': 0.025307557117750446, 'mess': 0.1265377855887522, 'before we': 0.0632688927943761, u'<VBP>': 0.025307557117750446, u'<dobj> mess': 0.04998242530755713, 'commit professional': 0.0632688927943761, 'we commit': 0.0632688927943761, 'sort this': 0.0632688

{'<MaximumDepthTree>': 4.4399999999999995, '<MAX>': 0.0016558602099122318, 'sleep </s>': 0.15483870967741936, '<SEAN>': 0.0035791113758133915, 'need to': 0.15483870967741936, 'sleep': 0.3096774193548387, u'<VB>': 0.061935483870967756, 'need': 0.3096774193548387, 'you need': 0.15483870967741936, 'ian': 0.3096774193548387, u'<ROOT> need': 0.1223225806451613, '<TANYA>': 0.0024825847636665175, u'<NNP>': 0.061935483870967756, u'<dobj> sleep': 0.1223225806451613, 'to get': 0.15483870967741936, u'<DT>': 0.061935483870967756, u'<TO>': 0.061935483870967756, '<IAN>': 0.0025106692796565576, '<SHIRLEY>': 0.0024482325264054135, 'some sleep': 0.15483870967741936, '<ROXY>': 0.0008468178911433905, u'<PRP>': 0.061935483870967756, 'get': 0.3096774193548387, u'<nsubj> you': 0.1223225806451613, u'<VBP>': 0.061935483870967756, 'ian </s>': 0.15483870967741936, '<s> you': 0.15483870967741936, '<s> ian': 0.15483870967741936, u'<NN>': 0.061935483870967756, '<BRADLEY>': 0.0007988247562109235, '<JANE>': 0.015470

{'<s> no': 0.07101292040635171, '<MaximumDepthTree>': 1.48, u'<VB>': 0.028405168162540684, 'japan': 0.14202584081270342, u'<ROOT> no': 0.056100207121017855, 'old mate': 0.07101292040635171, '<pause>': 0.14202584081270342, 'mate have': 0.07101292040635171, '<MAX>': 0.0013643559035443034, u'<VBG>': 0.028405168162540684, 'so <pause>': 0.07101292040635171, '<IAN>': 0.0015527295138158476, '<ROXY>': 0.001118765935213767, '<HEATHER>': 0.0016781226104763468, u'<VBP>': 0.05681033632508137, u'<NN>': 0.08521550448762207, u'<ROOT> moved': 0.056100207121017855, '<CHRISTIAN>': 0.0012977499263532473, '<CLARE>': 0.00010081746614395629, 'a well': 0.07101292040635171, 'old bos': 0.07101292040635171, u'<RB>': 0.11362067265016274, u'<dobj> word': 0.056100207121017855, 'out he': 0.07101292040635171, 'old': 0.21303876121905513, 'even tried': 0.07101292040635171, 'no </s>': 0.07101292040635171, 'my old': 0.14202584081270342, 'see if': 0.07101292040635171, u'<IN>': 0.11362067265016274, 'see': 0.14202584081270

{'by the': 0.04603580562659847, 'a long': 0.04603580562659847, 'for year': 0.04603580562659847, 'anywhere </s>': 0.04603580562659847, '<MaximumDepthTree>': 5.92, u'<VB>': 0.12890025575447572, 'find': 0.09207161125319693, 'might find': 0.04603580562659847, '<pause>': 0.18414322250639387, 'somewhere': 0.09207161125319693, 'year </s>': 0.04603580562659847, 'bradley': 0.09207161125319693, 'might': 0.18414322250639387, '<IAN>': 0.0017127899532426195, 'or we': 0.04603580562659847, '<ROXY>': 0.0006016066171213772, 'get': 0.09207161125319693, '<HEATHER>': 0.00037634204657997485, u'<VBP>': 0.07365728900255755, '<s> or': 0.04603580562659847, 'decide': 0.09207161125319693, 'the wrong': 0.04603580562659847, 'bit': 0.09207161125319693, 'look for': 0.04603580562659847, '<s> we': 0.09207161125319693, 'we just': 0.09207161125319693, '<CHRISTIAN>': 0.0004336786352412723, u'<RB>': 0.23938618925831207, 'travel for': 0.04603580562659847, 'beautiful </s>': 0.04603580562659847, 'might decide': 0.04603580562

{'<STACEY>': 0.002319833251643296, '<MAX>': 0.0021004107816477297, '<SEAN>': 0.0019493642303991773, '<JANE>': 0.003838417354455666, u'<IN>': 0.07438016528925617, '<MaximumDepthTree>': 4.4399999999999995, '<GARRY>': 0.005938564749347973, 'the solicitor': 0.18595041322314043, '<TANYA>': 0.004904473162618343, '<MINTY>': 0.0019663847344359732, 'solicitor </s>': 0.18595041322314043, 'from the': 0.18595041322314043, 'it from': 0.18595041322314043, u'<DT>': 0.07438016528925617, '<IAN>': 0.0022011124428697407, '<SHIRLEY>': 0.004261542674186991, u'<ROOT> it': 0.14690082644628097, u'<PRP>': 0.07438016528925617, '<HEATHER>': 0.0038030737950726073, 'solicitor': 0.37190082644628086, '<RONNIE>': 0.005712072066696561, u'<NNS>': 0.07438016528925617, '<CHRISTIAN>': 0.0030731743173509404, '<PHIL>': 0.0018678963056997886, '<STEVEN>': 0.00379792174503002, '<CLARE>': 0.0038604314929493243, '<s> it': 0.18595041322314043, '<JACK>': 0.0023309467303065886}
{'ive': 0.16961130742049477, '<STACEY>': 0.00044592571

{u'<ROOT> give': 0.08664861205145565, '<s> no': 0.10968178740690589, u'<VB>': 0.39485443466486114, 'alright </s>': 0.10968178740690589, u'<ROOT> no': 0.08664861205145565, 'up like': 0.10968178740690589, 'at me': 0.10968178740690589, '<pause>': 0.43872714962762355, u'<CC>': 0.21936357481381177, u'<ROOT> dunno': 0.08664861205145565, 'them </s>': 0.10968178740690589, '<IAN>': 0.0005910090198216192, 'of thing': 0.10968178740690589, '<ROXY>': 0.001006855666723296, u'<DT>': 0.08774542992552471, 'get': 0.21936357481381177, '<HEATHER>': 0.0008564248442380125, u'<ROOT> want': 0.08664861205145565, u'<VBP>': 0.08774542992552471, u'<dobj> them': 0.08664861205145565, 'dunno <pause>': 0.10968178740690589, 'them to': 0.10968178740690589, 'hed just': 0.10968178740690589, 'dont want': 0.10968178740690589, 'like': 0.21936357481381177, '<CHRISTIAN>': 0.001868220771709858, 'end up': 0.10968178740690589, u'<RB>': 0.13161814488828708, 'try': 0.21936357481381177, 'all of': 0.10968178740690589, '<CLARE>': 0.0

{'<MaximumDepthTree>': 4.4399999999999995, 'mate when': 0.09852216748768472, 'when im': 0.09852216748768472, u'<nsubj> i': 0.07783251231527094, '<SEAN>': 0.004300936351138537, '<JANE>': 0.0010945197922880394, '<PHIL>': 0.0011001218973689115, 'im': 0.19704433497536944, 'ready': 0.19704433497536944, 'yes': 0.19704433497536944, '<GARRY>': 0.003204748676579425, u'<ROOT> mate': 0.07783251231527094, '<TANYA>': 0.004355290446550675, u'<JJ>': 0.039408866995073885, '<MINTY>': 0.0010856112989116142, 'mate': 0.19704433497536944, '<s> yes': 0.09852216748768472, '<MAX>': 0.0011116242206319336, '<IAN>': 0.0011153192341600859, '<SHIRLEY>': 0.0011034691438052491, '<ROXY>': 0.002093699382952583, u'<PRP>': 0.039408866995073885, '<HEATHER>': 0.0030995056533255835, u'<VBP>': 0.07881773399014777, '<RONNIE>': 0.00211788267334542, u'<WRB>': 0.039408866995073885, '<CHRISTIAN>': 0.0016766961286718474, 'ready </s>': 0.09852216748768472, '<JACK>': 0.0011120036716986623, u'<UH>': 0.039408866995073885, 'yes mate':

{'<SHIRLEY>': 0.007736722507656423, '<MaximumDepthTree>': 1.48, '<ROXY>': 0.007413042311510913, 'yes </s>': 0.3284671532846715, '<TANYA>': 0.029147050020041424, '<MAX>': 0.004202749556624429, '<HEATHER>': 0.011227843670076754, '<SEAN>': 0.007780307697764509, '<JANE>': 0.0042055884457561496, '<PHIL>': 0.004205317639281372, '<RONNIE>': 0.008110323980473495, '<s> yes': 0.3284671532846715, 'yes': 0.656934306569343, u'<UH>': 0.13138686131386862, u'<ROOT> yes': 0.25948905109489057, '<JACK>': 0.0035553017736449336, '<IAN>': 0.007671226849724353}
{'no i': 0.1146679407548973, 'form </s>': 0.1146679407548973, u'<RB>': 0.04586717630195892, 'deal': 0.2293358815097946, 'tell you': 0.1146679407548973, 'deal <?>': 0.1146679407548973, 'share': 0.2293358815097946, 'yeah': 0.2293358815097946, '<s> no': 0.1146679407548973, '<SEAN>': 0.0019489031220083337, '<JANE>': 0.0014273650225445082, '<PHIL>': 0.0027174122771844314, '<STACEY>': 0.002298801259224778, 'what </s>': 0.1146679407548973, 'and then': 0.1146

{u'<VBD>': 0.03069053708439899, 'easy when': 0.07672634271099747, 'when you': 0.07672634271099747, u'<nsubj> i': 0.06061381074168801, 'howsomething': 0.15345268542199494, '<SEAN>': 0.001821970126222612, '<JANE>': 0.0035092791890289124, '<PHIL>': 0.0009910995186872772, '<MaximumDepthTree>': 1.48, 'said <?>': 0.07672634271099747, 'howsomething i': 0.07672634271099747, 'said': 0.15345268542199494, '<TANYA>': 0.004429810069993422, u'<JJ>': 0.03069053708439899, '<MINTY>': 0.0017801159362687182, 'easy': 0.15345268542199494, u'<VBG>': 0.03069053708439899, '<IAN>': 0.002648550072118572, '<SHIRLEY>': 0.000878650509534534, 'you know': 0.07672634271099747, u'<PRP>': 0.06138107416879798, '<HEATHER>': 0.0018008196274662085, u'<nsubj> you': 0.06061381074168801, '<s> easy': 0.07672634271099747, 'know': 0.15345268542199494, 'i said': 0.07672634271099747, u'<VBP>': 0.03069053708439899, '<?>': 0.15345268542199494, u'<ROOT> said': 0.06061381074168801, '<BRADLEY>': 0.0017496269183858472, u'<WRB>': 0.03069

{'<STACEY>': 0.0011414298770846326, 'dont': 0.14280047600158668, '<SHIRLEY>': 0.000790459863664153, '<PHIL>': 0.0004398117385999729, '<MAX>': 0.0007822498553902567, '<SEAN>': 0.0015504227753914198, 'tonight': 0.14280047600158668, u'<IN>': 0.02856009520031734, '<MaximumDepthTree>': 4.4399999999999995, 'it even': 0.07140023800079334, 'begun': 0.14280047600158668, 'sabotage tonight': 0.07140023800079334, u'<VBN>': 0.02856009520031734, '<GARRY>': 0.001954750769345999, 'before it': 0.07140023800079334, 'even': 0.14280047600158668, '<TANYA>': 0.004423420760390813, u'<ROOT> trying': 0.05640618802062674, '<MINTY>': 0.0007704288231943491, 'doing dont': 0.07140023800079334, 'begun </s>': 0.07140023800079334, u'<PRP$>': 0.02856009520031734, 'youre trying': 0.07140023800079334, u'<VBG>': 0.05712019040063468, 'youre doing': 0.07140023800079334, 'even begun': 0.07140023800079334, '<IAN>': 0.0003806888342485016, 'you <?>': 0.07140023800079334, 'you know': 0.07140023800079334, '<ROXY>': 0.001229368053

{'vinnie': 0.1970443349753695, u'<ROOT> understand': 0.07783251231527097, u'<dobj> rhythm\ufb02': 0.07783251231527097, 'feel': 0.1970443349753695, u'<nsubj> george': 0.07783251231527097, u'<nsubj> foot': 0.07783251231527097, 'understand how': 0.09852216748768475, '<SEAN>': 0.0015738296273741382, '<JANE>': 0.00107617071003188, '<PHIL>': 0.0008661601133815698, '<STACEY>': 0.0013239678368949712, 'how vinnie': 0.09852216748768475, u'<VB>': 0.07881773399014781, 'feel </s>': 0.09852216748768475, '<GARRY>': 0.002646747246677241, 'rhythm </s>': 0.09852216748768475, '<s> never': 0.09852216748768475, 'got no': 0.09852216748768475, 'would': 0.1970443349753695, '<TANYA>': 0.005557813765726941, u'<JJ>': 0.039408866995073906, u'<NNP>': 0.07881773399014781, u'<nsubj> \ufb01im': 0.07783251231527097, 'dance again': 0.09852216748768475, 'foot have': 0.09852216748768475, 'dance': 0.1970443349753695, 'never gonna': 0.09852216748768475, u'<TO>': 0.039408866995073906, u'<DT>': 0.039408866995073906, u'<MD>':

{'if you': 0.08125188083057477, 'help': 0.16250376166114955, 'stood here': 0.08125188083057477, '<MaximumDepthTree>': 7.4, u'<VB>': 0.13000300932891964, '<s> how': 0.08125188083057477, 'letting': 0.16250376166114955, 'need your': 0.08125188083057477, u'<ROOT> need': 0.06418898585615408, 'paste you': 0.08125188083057477, 'potless if': 0.08125188083057477, 'wa having': 0.08125188083057477, '<?> right': 0.08125188083057477, u'<VBG>': 0.06500150466445982, '<?> you': 0.08125188083057477, 'stuck your': 0.08125188083057477, 'a laugh': 0.08125188083057477, '<ROXY>': 0.0017347462502540398, 'ya': 0.16250376166114955, '<HEATHER>': 0.0005482167772697927, u'<VBP>': 0.03250075233222991, u'<NN>': 0.19500451399337948, u'<WRB>': 0.03250075233222991, '<CHRISTIAN>': 0.0008519883911589002, u'<RB>': 0.2600060186578393, 'over the': 0.08125188083057477, 'leave': 0.16250376166114955, 'whatsherface </s>': 0.08125188083057477, 'the pub': 0.08125188083057477, 'right i': 0.08125188083057477, 'thing alone': 0.0812

{'<STACEY>': 0.0013174487957146068, u'<RB>': 0.02478485370051635, '<MAX>': 0.0007573588212541737, 'have actually': 0.06196213425129087, u'<ROOT> think': 0.04895008605851979, '<SEAN>': 0.002844060239531124, '<JANE>': 0.0014140097166674937, '<MaximumDepthTree>': 2.96, 'done': 0.12392426850258174, u'<VB>': 0.0495697074010327, u'<VBN>': 0.02478485370051635, 'might have': 0.06196213425129087, '<TANYA>': 0.000658664423900694, '<?>': 0.12392426850258174, u'<NNP>': 0.02478485370051635, 'think phil': 0.06196213425129087, '<MINTY>': 0.001461143473668162, 'actually': 0.12392426850258174, 'it <?>': 0.06196213425129087, u'<dobj> it': 0.04895008605851979, u'<nsubj> phil': 0.04895008605851979, 'done it': 0.06196213425129087, 'might': 0.12392426850258174, '<IAN>': 0.0015244378422375874, '<SHIRLEY>': 0.001319185011468568, '<ROXY>': 0.0006760572276688777, u'<PRP>': 0.0495697074010327, 'do you': 0.06196213425129087, '<HEATHER>': 0.0006756220512231781, u'<MD>': 0.02478485370051635, u'<nsubj> you': 0.04895

{'<STACEY>': 0.004959750904137585, '<MAX>': 0.0050604867559074646, '<SEAN>': 0.005315612059523005, '<PHIL>': 0.0016995270505881072, u'<ROOT> covering': 0.11781275890637945, '<MaximumDepthTree>': 1.48, '<s> who': 0.14913007456503727, 'covering <?>': 0.14913007456503727, '<TANYA>': 0.007237216548875276, u'<VBG>': 0.059652029826014905, 'who covering': 0.14913007456503727, '<IAN>': 0.001775874807637913, '<SHIRLEY>': 0.0016213477420655936, '<ROXY>': 0.0018784129175138985, '<HEATHER>': 0.0017490379176626383, 'who': 0.29826014913007454, u'<nsubj> who': 0.11781275890637945, u'<WP>': 0.059652029826014905, u'<VBZ>': 0.059652029826014905, '<?>': 0.29826014913007454, '<BRADLEY>': 0.0016239055086445541, 'covering': 0.29826014913007454, '<STEVEN>': 0.0037201869940092177, '<CLARE>': 0.0018323084133006077, '<JACK>': 0.004774054003994939}
{'dyou': 0.17946161515453637, '<STACEY>': 0.0032571285952129494, 'want': 0.17946161515453637, '<SHIRLEY>': 0.0030144504519911686, '<MAX>': 0.002973391712445224, '<SEA

{'<STACEY>': 0.0036265257370322935, 'if he': 0.17191977077363896, '<MAX>': 0.005856383444620987, '<SEAN>': 0.0018065361336707221, '<JANE>': 0.0018059019503356409, u'<IN>': 0.06876790830945559, '<MaximumDepthTree>': 2.96, u'<ROOT> what': 0.13581661891117477, '<GARRY>': 0.0029249351715106148, 'he doe': 0.17191977077363896, '<TANYA>': 0.00742182948783596, '<MINTY>': 0.001874765897313465, '<IAN>': 0.007605060843343539, '<SHIRLEY>': 0.003615465631333105, '<ROXY>': 0.0036304399785739706, u'<PRP>': 0.06876790830945559, u'<WP>': 0.06876790830945559, '<RONNIE>': 0.00185096884576281, 'doe <?>': 0.17191977077363896, u'<VBZ>': 0.06876790830945559, '<?>': 0.3438395415472779, '<BRADLEY>': 0.0018083253443935763, u'<nsubj> he': 0.13581661891117477, '<CHRISTIAN>': 0.006029595058628613, '<s> what': 0.17191977077363896, 'what if': 0.17191977077363896}
{'<STACEY>': 0.0028649562474289095, 'coming home': 0.1677539608574091, '<MAX>': 0.0028187846513880953, 'quiet </s>': 0.1677539608574091, '<SEAN>': 0.002807

{'to family': 0.07722007722007722, 'ive': 0.15444015444015444, 'youre': 0.15444015444015444, 'family': 0.15444015444015444, '<MAX>': 0.0008212284327706495, 'anything for': 0.07722007722007722, u'<nsubj> i': 0.12200772200772202, 'thing ive': 0.07722007722007722, '<JANE>': 0.0007865222997550154, u'<IN>': 0.030888030888030892, 'id do': 0.07722007722007722, u'<VB>': 0.061776061776061784, 'family dot': 0.07722007722007722, 'closest thing': 0.07722007722007722, 'id': 0.15444015444015444, '<GARRY>': 0.0020220362537220556, '<TANYA>': 0.004168148954088265, u'<ROOT> do': 0.06100386100386101, '<MINTY>': 0.0016002705051614285, u'<VBN>': 0.030888030888030892, u'<JJS>': 0.030888030888030892, u'<ROOT> re': 0.06100386100386101, u'<TO>': 0.030888030888030892, u'<DT>': 0.030888030888030892, u'<MD>': 0.030888030888030892, 'got': 0.15444015444015444, '<s> youre': 0.07722007722007722, '<IAN>': 0.0016530427574683113, '<SHIRLEY>': 0.0010908046734801199, 'dot </s>': 0.07722007722007722, '<ROXY>': 0.0011793911

{'<STACEY>': 0.0075800816282421135, '<pause>': 0.6569343065693432, u'<CC>': 0.13138686131386862, u'<ROOT> but': 0.25948905109489057, '<HEATHER>': 0.01490514600352041, '<TANYA>': 0.011290601589973403, '<MINTY>': 0.021515644293823816, '<JACK>': 0.0034490816360658446, '<JANE>': 0.003439636754103241, '<s> but': 0.3284671532846716, '<RONNIE>': 0.0038277184784679926, '<MaximumDepthTree>': 1.48, 'but <pause>': 0.3284671532846716, '<MAX>': 0.004346997947332705, '<STEVEN>': 0.011634299386679705, '<GARRY>': 0.006255156556753406, '<IAN>': 0.0070111101775921164}
{'thats all': 0.14106583072100315, 'one drink': 0.14106583072100315, u'<RB>': 0.1692789968652038, 'dad <?>': 0.14106583072100315, u'<>': 0.05642633228840127, '<MAX>': 0.0019628888733104746, 'anything </s>': 0.14106583072100315, 'okay <pause>': 0.14106583072100315, 'one': 0.2821316614420063, 'drink alright': 0.14106583072100315, '<PHIL>': 0.002376482159401931, 'doesnt': 0.2821316614420063, 'just a': 0.14106583072100315, u'<VB>': 0.056426332

{'<STACEY>': 0.0015294416757676082, 'minty <?>': 0.19115044247787616, '<MAX>': 0.0015193318555437852, '<SEAN>': 0.0021335540610516015, '<JANE>': 0.004813923026811032, '<PHIL>': 0.0015359069801106268, '<MaximumDepthTree>': 2.96, u'<VB>': 0.07646017699115046, 'message': 0.3823008849557523, '<JACK>': 0.0045405296229852924, u'<RP>': 0.07646017699115046, u'<VBN>': 0.07646017699115046, '<GARRY>': 0.005282401085863007, '<TANYA>': 0.004721117357399885, u'<ROOT> minty': 0.15100884955752217, '<MINTY>': 0.002254481428215437, '<s> minty': 0.19115044247787616, 'your message': 0.19115044247787616, u'<PRP$>': 0.07646017699115046, 'minty': 0.3823008849557523, 'up early': 0.19115044247787616, '<IAN>': 0.0042209864551139165, '<SHIRLEY>': 0.005123911477885528, '<ROXY>': 0.00283688966109042, 'finished up': 0.19115044247787616, '<HEATHER>': 0.00744557019775575, u'<ROOT> finished': 0.15100884955752217, 'early': 0.3823008849557523, 'finished': 0.3823008849557523, '<RONNIE>': 0.002840185219092708, u'<ROOT> go

{'flower </s>': 0.20270270270270266, '<MaximumDepthTree>': 1.48, 'lotus': 0.4054054054054053, '<MAX>': 0.004524827794587551, 'lotus flower': 0.20270270270270266, '<SEAN>': 0.0020726925911723508, '<JANE>': 0.0026300699544120716, '<PHIL>': 0.0027242608818842597, '<GARRY>': 0.003861260165857412, 'flower': 0.4054054054054053, '<TANYA>': 0.006698348364126767, u'<NNP>': 0.08108108108108107, '<MINTY>': 0.002183911423568524, u'<ROOT> flower': 0.1601351351351351, '<IAN>': 0.004296449171638206, '<ROXY>': 0.0026508116722944713, '<HEATHER>': 0.008599475628633004, '<s> lotus': 0.20270270270270266, u'<NN>': 0.08108108108108107, '<STEVEN>': 0.0023608323576804046, '<CLARE>': 0.009310735189958364, '<JACK>': 0.004506406166104505, '<RONNIE>': 0.002363702421865873}
{u'<VBD>': 0.03740259740259741, 'dont': 0.18701298701298702, 'she believed': 0.09350649350649351, 'i dont': 0.09350649350649351, '<MAX>': 0.0010212675197600865, 'dont think': 0.09350649350649351, u'<nsubj> i': 0.07387012987012988, u'<ROOT> thin

{u'<VB>': 0.0736422215403498, 'love': 0.27615833077631174, 'find': 0.18410555385087451, 'certainly': 0.18410555385087451, 'who <?>': 0.09205277692543726, u'<JJS>': 0.11046333231052471, u'<VBG>': 0.0736422215403498, '<IAN>': 0.0016789601172871027, '<ROXY>': 0.0013871720831920423, '<?> for': 0.18410555385087451, '<HEATHER>': 0.0014149419241625883, '<?> best': 0.09205277692543726, u'<ROOT> gon': 0.07272169377109544, 'gonna': 0.27615833077631174, 'anyone who': 0.09205277692543726, u'<NN>': 0.0368211107701749, 'best <?>': 0.09205277692543726, 'you </s>': 0.18410555385087451, '<CLARE>': 0.00041130125272481193, u'<RB>': 0.18410555385087451, 'minty <?>': 0.09205277692543726, '<JANE>': 0.0009495402379996233, '<PHIL>': 0.0011121623980545358, 'best': 0.36821110770174903, 'you a': 0.09205277692543726, '<?> certainly': 0.09205277692543726, u'<NNP>': 0.0368211107701749, 'minty': 0.18410555385087451, 'a what': 0.09205277692543726, u'<PRP>': 0.22092666462104937, 'never': 0.18410555385087451, u'<WP>': 

{'right': 0.15221987315010574, '<MAX>': 0.0025442745756019207, u'<nsubj> i': 0.12025369978858354, 'doing the': 0.07610993657505287, '<s> i': 0.07610993657505287, '<JANE>': 0.002711928379204674, '<PHIL>': 0.0016363429336067456, u'<VBD>': 0.0608879492600423, 'i wa': 0.07610993657505287, '<GARRY>': 0.0013036332542425873, '<pause>': 0.15221987315010574, '<TANYA>': 0.001750506302795518, u'<JJ>': 0.03044397463002115, '<MINTY>': 0.0009155446042249994, 'thought i': 0.07610993657505287, u'<DT>': 0.03044397463002115, 'thing <pause>': 0.07610993657505287, 'the right': 0.07610993657505287, '<IAN>': 0.0008467798567947223, '<SHIRLEY>': 0.0017186119287033552, u'<PRP>': 0.0608879492600423, 'wa doing': 0.07610993657505287, '<HEATHER>': 0.0017410308505122934, '<RONNIE>': 0.0025424105494038646, u'<ROOT> thought': 0.06012684989429177, 'right thing': 0.07610993657505287, u'<NN>': 0.03044397463002115, '<SEAN>': 0.0018223044238721514, u'<VBG>': 0.03044397463002115, 'i thought': 0.07610993657505287, '<Maximum

{u'<VBD>': 0.06504065040650404, '<SEAN>': 0.0039178314434008535, '<JANE>': 0.005475639096847974, 'did you': 0.16260162601626013, '<STACEY>': 0.0018833933854574933, u'<VB>': 0.06504065040650404, 'know <?>': 0.16260162601626013, '<TANYA>': 0.011277432825310359, '<MINTY>': 0.00396720698716382, '<IAN>': 0.005542114233361152, '<SHIRLEY>': 0.0018428115879512404, 'you know': 0.16260162601626013, u'<PRP>': 0.06504065040650404, '<HEATHER>': 0.0037674517298970393, u'<nsubj> you': 0.1284552845528455, '<RONNIE>': 0.0019029558704796207, u'<ROOT> know': 0.1284552845528455, 'know': 0.32520325203252026, '<s> did': 0.16260162601626013, '<?>': 0.32520325203252026, '<BRADLEY>': 0.0036444264229260194, '<MaximumDepthTree>': 1.48, '<JACK>': 0.003933207961919869}
{'<STACEY>': 0.002008412579128859, 'one': 0.3719008264462811, '<JANE>': 0.007437900878040267, '<MaximumDepthTree>': 1.48, 'just a': 0.18595041322314054, 'one </s>': 0.18595041322314054, '<GARRY>': 0.006237780366329686, '<TANYA>': 0.00590506307269221

{'<STACEY>': 0.0022685344743986024, 'dont': 0.1994736113035046, '<SHIRLEY>': 0.0006834126179525851, '<MAX>': 0.0004039890597531903, 'bringing them': 0.0997368056517523, 'mind do': 0.0997368056517523, u'<WDT>': 0.03989472226070093, 'paper that': 0.0997368056517523, '<JANE>': 0.0006936836333386785, u'<IN>': 0.07978944452140185, 'paper': 0.1994736113035046, '<s> for': 0.0997368056517523, 'you dont': 0.0997368056517523, 'just some': 0.0997368056517523, 'need': 0.1994736113035046, '<ROXY>': 0.000830620407611217, 'trust': 0.1994736113035046, 'witness': 0.1994736113035046, u'<ROOT> for': 0.07879207646488433, u'<ROOT> bringing': 0.07879207646488433, 'for ben': 0.0997368056517523, '<TANYA>': 0.005251187821916615, 'bringing': 0.1994736113035046, '<s> it': 0.0997368056517523, u'<NNP>': 0.03989472226070093, '<s> you': 0.0997368056517523, u'<dobj> signature': 0.07879207646488433, '<MINTY>': 0.0012546806509283696, 'ben trust': 0.0997368056517523, '<GARRY>': 0.0014058806065243156, u'<PRP$>': 0.039894

{'<STACEY>': 0.005927753654800246, u'<ROOT> ask': 0.12206008583690989, 'that <?>': 0.15450643776824038, 'not </s>': 0.15450643776824038, 'no </s>': 0.15450643776824038, '<s> no': 0.30901287553648077, '<SEAN>': 0.002827473143510252, '<JANE>': 0.0011105576701432181, '<PHIL>': 0.0014981270228972498, '<MaximumDepthTree>': 4.4399999999999995, u'<VB>': 0.06180257510729615, 'is about': 0.15450643776824038, 'ask me': 0.15450643776824038, u'<ROOT> no': 0.24412017167381977, '<GARRY>': 0.0017273107537605738, u'<PRP>': 0.1236051502145923, 'even': 0.30901287553648077, u'<nsubj> this': 0.12206008583690989, '<TANYA>': 0.007345632653105277, '<MINTY>': 0.0015926030990920104, u'<dobj> me': 0.12206008583690989, u'<MD>': 0.06180257510729615, '<CHRISTIAN>': 0.001078487016006617, 'course not': 0.15450643776824038, u'<nsubj> that': 0.12206008583690989, u'<dobj> that': 0.12206008583690989, '<IAN>': 0.004851285422673278, '<SHIRLEY>': 0.0017560925753184698, u'<ROOT> is': 0.12206008583690989, 'can you': 0.154506

{'starter </s>': 0.1541095890410959, '<MAX>': 0.0017469498929079553, '<SEAN>': 0.0032546885595773076, '<JANE>': 0.0016412362563745378, u'<IN>': 0.06164383561643835, '<STACEY>': 0.0050408941894488175, '<TANYA>': 0.005202356897046485, '<MINTY>': 0.003319946155437728, 'starter': 0.3082191780821918, u'<PRP$>': 0.06164383561643835, 'for starter': 0.1541095890410959, '<IAN>': 0.0016822604477981138, '<SHIRLEY>': 0.003497751634564145, '<ROXY>': 0.005769313639215714, 'kid for': 0.1541095890410959, '<RONNIE>': 0.0017048999174576322, '<s> my': 0.1541095890410959, '<BRADLEY>': 0.0036554735354962543, 'kid': 0.3082191780821918, u'<ROOT> kid': 0.12174657534246575, u'<NNS>': 0.1232876712328767, '<CHRISTIAN>': 0.0029948856195792907, '<MaximumDepthTree>': 2.96, '<STEVEN>': 0.0017285190056166107, '<CLARE>': 0.0017336687352115059, '<JACK>': 0.001718936336185707, 'my kid': 0.1541095890410959}
{'<STACEY>': 0.006741689003403129, '<SHIRLEY>': 0.001837776237404004, '<MAX>': 0.0018926702394614304, '<SEAN>': 0.0

{'<SHIRLEY>': 0.009940440032246721, '<STACEY>': 0.009665547896878667, '<CHRISTIAN>': 0.005310106355010548, '<RONNIE>': 0.01398845430186405, u'<ROOT> morning': 0.25948905109489057, '<MAX>': 0.015363280701042505, '<HEATHER>': 0.003611821864375997, 'morning </s>': 0.3284671532846716, '<MINTY>': 0.003307297263149857, 'morning': 0.6569343065693432, '<PHIL>': 0.012495728574419166, '<TANYA>': 0.004250014808223012, '<s> morning': 0.3284671532846716, '<MaximumDepthTree>': 1.48, u'<NN>': 0.13138686131386862, '<JACK>': 0.008057167629951664, '<CLARE>': 0.0033497616570221113, '<GARRY>': 0.005915853368370458}
{'<s> because': 0.07992895204262879, u'<VBD>': 0.031971580817051516, u'<VB>': 0.031971580817051516, '<SHIRLEY>': 0.0020696225215709406, 'if i': 0.07992895204262879, 'blower to': 0.07992895204262879, 'on the': 0.07992895204262879, 'to your': 0.07992895204262879, '<JANE>': 0.0008205401366809064, u'<IN>': 0.09591474245115457, '<STACEY>': 0.001349925695408961, 'she want': 0.07992895204262879, 'want

{'<s> yeah': 0.07783783783783788, 'right': 0.15567567567567575, 'here though': 0.07783783783783788, '<MaximumDepthTree>': 2.96, u'<ROOT> left': 0.061491891891891914, '<SHIRLEY>': 0.0030217331081983795, 'though aint': 0.07783783783783788, '<MAX>': 0.0012791943678935956, 'yeah': 0.15567567567567575, '<SEAN>': 0.001280811813946926, '<JANE>': 0.0009053045056503034, u'<IN>': 0.062270270270270295, '<STACEY>': 0.0012573426243462297, 'but he': 0.07783783783783788, '<MINTY>': 0.0009047876615854052, u'<VBZ>': 0.062270270270270295, 'yeah but': 0.07783783783783788, 'least </s>': 0.07783783783783788, u'<VBN>': 0.031135135135135147, '<GARRY>': 0.0014983236826951248, 'aint': 0.15567567567567575, 'he': 0.15567567567567575, 'he <?>': 0.07783783783783788, 'he left': 0.07783783783783788, '<TANYA>': 0.0029662484096914955, 'squatter right': 0.07783783783783788, '<?> thats': 0.07783783783783788, 'least': 0.15567567567567575, u'<CC>': 0.031135135135135147, u'<JJS>': 0.031135135135135147, u'<DT>': 0.031135135

{'anyway': 0.1710010687566797, 'new opening': 0.08550053437833985, '<MaximumDepthTree>': 5.92, u'<VB>': 0.06840042750267188, 'menu': 0.1710010687566797, u'<nsubj> hour': 0.06754542215888848, '<MAX>': 0.0026701455854845025, u'<VBG>': 0.03420021375133594, 'excited': 0.1710010687566797, '<IAN>': 0.00287096486753263, '<ROXY>': 0.0006057208671355056, 'ya': 0.1710010687566797, '<HEATHER>': 0.001241880615587537, 'were gonna': 0.08550053437833985, 'gonna': 0.1710010687566797, u'<VBP>': 0.03420021375133594, u'<ROOT> got': 0.06754542215888848, 'got': 0.1710010687566797, u'<NN>': 0.13680085500534375, '<s> we': 0.08550053437833985, '<CHRISTIAN>': 0.0011412668878214157, '<CLARE>': 0.0007139230522786415, 'ya later': 0.08550053437833985, u'<dobj> ya': 0.06754542215888848, u'<RB>': 0.13680085500534375, 'later </s>': 0.08550053437833985, u'<ROOT> think': 0.06754542215888848, '<JANE>': 0.0012408224440307444, '<PHIL>': 0.0006921051095649559, 'see': 0.1710010687566797, 'very well': 0.08550053437833985, 's

{'nice one': 0.2027027027027027, '<MAX>': 0.006056960597270996, 'one': 0.4054054054054054, '<JANE>': 0.004315760338676549, '<MaximumDepthTree>': 1.48, 'one </s>': 0.2027027027027027, '<GARRY>': 0.0038064458604112287, '<TANYA>': 0.001999954435619884, u'<JJ>': 0.08108108108108109, '<IAN>': 0.004247858044266462, '<SHIRLEY>': 0.0021716341598305654, '<ROXY>': 0.007134198011264798, '<HEATHER>': 0.0019990068911425623, '<RONNIE>': 0.0028017109534733795, u'<CD>': 0.08108108108108109, u'<ROOT> one': 0.16013513513513514, '<CHRISTIAN>': 0.007128170546370691, '<STEVEN>': 0.004039730843669623, '<CLARE>': 0.004542221717287966, '<JACK>': 0.008540131384499076, '<s> nice': 0.2027027027027027, 'nice': 0.4054054054054054}
{'<SHIRLEY>': 0.0037211609821346746, '<STACEY>': 0.007447083804287108, '<ROXY>': 0.003738722570721834, '<CHRISTIAN>': 0.005972915988390573, '<TANYA>': 0.029760904189921538, '<MAX>': 0.007440849553211229, '<s> what': 0.3284671532846715, '<MINTY>': 0.003719080671705091, '<SEAN>': 0.0037142

{'saying': 0.09879253567508237, '<STACEY>': 0.0017421922906105537, 'not just': 0.04939626783754118, 'feel': 0.09879253567508237, '<SEAN>': 0.0021689611210516124, '<PHIL>': 0.0016179961864994142, 'sure youre': 0.04939626783754118, u'<VB>': 0.03951701427003295, 'make me': 0.04939626783754118, 'you sure': 0.04939626783754118, '<GARRY>': 0.0017693127811125463, u'<PRP>': 0.059275521405049436, '<TANYA>': 0.0005124782549279498, 'make': 0.09879253567508237, u'<nsubj> me': 0.03902305159165754, '<MINTY>': 0.0005197698816704822, 'saying that': 0.04939626783754118, 'good <?>': 0.04939626783754118, u'<DT>': 0.019758507135016475, u'<TO>': 0.019758507135016475, u'<VBG>': 0.019758507135016475, u'<dobj> that': 0.03902305159165754, '<SHIRLEY>': 0.001120087564560257, 'me feel': 0.04939626783754118, 'feel good': 0.04939626783754118, 'sure': 0.09879253567508237, 'that to': 0.04939626783754118, u'<ROOT> sure': 0.03902305159165754, '<HEATHER>': 0.0010394161497703448, u'<nsubj> you': 0.07804610318331508, '<s>

{'<STACEY>': 0.0037612891827582305, 'start this': 0.08314087759815243, '<SEAN>': 0.0009356053103968952, '<JANE>': 0.0009832771816397463, '<MaximumDepthTree>': 1.48, u'<VB>': 0.03325635103926097, 'again are': 0.08314087759815243, 'we going': 0.08314087759815243, u'<dobj> this': 0.06568129330254041, 'to start': 0.08314087759815243, 'start': 0.16628175519630486, '<TANYA>': 0.0037872644140223887, '<MINTY>': 0.001829405078624581, 'going to': 0.08314087759815243, 'going': 0.16628175519630486, '<GARRY>': 0.0014163879957615714, u'<DT>': 0.03325635103926097, u'<TO>': 0.03325635103926097, 'we <?>': 0.08314087759815243, 'this again': 0.08314087759815243, '<IAN>': 0.0038834274574646365, '<ROXY>': 0.0009566512251385988, u'<PRP>': 0.06651270207852195, u'<ROOT> are': 0.06568129330254041, u'<VBP>': 0.03325635103926097, 'are we': 0.08314087759815243, '<RONNIE>': 0.0018009827177950216, '<?>': 0.16628175519630486, u'<VBG>': 0.03325635103926097, '<s> we': 0.08314087759815243, u'<RB>': 0.03325635103926097,

{'if you': 0.0927357032457496, u'<VBD>': 0.03709428129829985, 'dont': 0.1854714064914992, 'i dont': 0.0927357032457496, '<MAX>': 0.001506690236341141, u'<nsubj> i': 0.14652241112828437, 'rat </s>': 0.0927357032457496, u'<IN>': 0.03709428129829985, 'rat': 0.1854714064914992, '<STACEY>': 0.0010433944411995777, u'<VB>': 0.0741885625965997, u'<dobj> rat': 0.07326120556414219, '<GARRY>': 0.0017548863504734038, '<TANYA>': 0.00477929461411165, 'me </s>': 0.0927357032457496, '<MINTY>': 0.002091127586990352, u'<dobj> me': 0.07326120556414219, u'<MD>': 0.03709428129829985, 'i wouldnt': 0.0927357032457496, '<IAN>': 0.0020891449440214303, '<SHIRLEY>': 0.001632236190139517, '<ROXY>': 0.0010269698324907955, u'<PRP>': 0.1483771251931994, 'like rat': 0.0927357032457496, u'<ROOT> eat': 0.07326120556414219, 'paid': 0.1854714064914992, u'<VBP>': 0.03709428129829985, 'wouldnt eat': 0.0927357032457496, '<RONNIE>': 0.002040534663574971, '<HEATHER>': 0.0010190747116277559, u'<nsubj> you': 0.07326120556414219

{'all quiet': 0.06541886243866982, 'on me': 0.06541886243866982, u'<VBD>': 0.02616754497546793, 'whats': 0.13083772487733963, 'dont': 0.13083772487733963, '<SHIRLEY>': 0.001081838139205312, '<PHIL>': 0.0015586796563039507, '<MAX>': 0.0010765516892323214, u'<nsubj> i': 0.05168090132654916, 'community': 0.13083772487733963, '<s> i': 0.06541886243866982, '<JANE>': 0.0007455040666188175, u'<IN>': 0.02616754497546793, 'see': 0.13083772487733963, '<s> whats': 0.06541886243866982, u'<ROOT> see': 0.05168090132654916, u'<VB>': 0.05233508995093586, 'go': 0.13083772487733963, '<GARRY>': 0.0005706559272733487, '</s> dont': 0.06541886243866982, 'leave': 0.13083772487733963, 'centreoi': 0.13083772487733963, '<TANYA>': 0.003714313002132582, u'<JJ>': 0.02616754497546793, u'<NNP>': 0.02616754497546793, 'me </s>': 0.06541886243866982, '<MINTY>': 0.001430080702094097, 'leave the': 0.06541886243866982, u'<DT>': 0.05233508995093586, 'the community': 0.06541886243866982, 'see you': 0.06541886243866982, u'<R

{'making his': 0.07661385670371246, 'different place': 0.07661385670371246, 'way round': 0.07661385670371246, 'really want': 0.07661385670371246, 'wouldnt mind': 0.07661385670371246, 'just seeing': 0.07661385670371246, '<MaximumDepthTree>': 7.4, 'world </s>': 0.07661385670371246, u'<VB>': 0.09193662804445497, 'this one': 0.07661385670371246, 'what i': 0.07661385670371246, 'to know': 0.07661385670371246, '<pause>': 0.6129108536296997, 'round the': 0.07661385670371246, 'mind some': 0.07661385670371246, 'place <pause>': 0.07661385670371246, u'<nsubj> year': 0.06052494679593285, 'start travelling': 0.07661385670371246, 'want <?>': 0.07661385670371246, 'travelling': 0.15322771340742491, 'am answering': 0.07661385670371246, '<IAN>': 0.0025381653951383823, 'it a': 0.07661385670371246, u'<ROOT> is': 0.06052494679593285, 'stace </s>': 0.07661385670371246, 'travelling you': 0.07661385670371246, '<HEATHER>': 0.0016568016476662367, u'<VBP>': 0.15322771340742491, 'know': 0.15322771340742491, 'but t

{'antique': 0.1775439750123295, '</s> know': 0.08877198750616475, u'<ROOT> thing': 0.07012987012987015, '<STACEY>': 0.0016395610969869665, 'in the': 0.08877198750616475, 'alot of': 0.08877198750616475, 'people': 0.1775439750123295, 'that boring': 0.08877198750616475, 'the most': 0.08877198750616475, '<s> antique': 0.08877198750616475, u'<ROOT> to': 0.07012987012987015, '<s> it': 0.08877198750616475, 'boring </s>': 0.08877198750616475, '<SEAN>': 0.00023272442647612743, '<JANE>': 0.0012535807737787388, 'wonderful thing': 0.08877198750616475, '<MaximumDepthTree>': 5.92, '<MINTY>': 0.0015179838849117178, u'<VB>': 0.0355087950024659, 'most wonderful': 0.08877198750616475, 'find': 0.1775439750123295, '<GARRY>': 0.001884820124089421, 'thing in': 0.08877198750616475, 'roadshowi': 0.1775439750123295, 'find that': 0.08877198750616475, '<TANYA>': 0.0026225557501713737, u'<JJ>': 0.0710175900049318, u'<NNP>': 0.0710175900049318, 'me </s>': 0.08877198750616475, 'world </s>': 0.08877198750616475, 'to

{'<s> no': 0.09724473257698538, u'<VBD>': 0.03889789303079415, 'get her': 0.09724473257698538, 'go': 0.19448946515397075, 'land lie': 0.09724473257698538, '<s> see': 0.09724473257698538, u'<JJS>': 0.03889789303079415, '<BRADLEY>': 0.00107111138085511, '<s> ill': 0.09724473257698538, '<IAN>': 0.0015340097184738627, 'mate </s>': 0.09724473257698538, 'go </s>': 0.09724473257698538, 'a stateshe': 0.09724473257698538, 'get': 0.19448946515397075, 'i go': 0.09724473257698538, '<HEATHER>': 0.0013251949191965767, u'<ROOT> want': 0.07682333873581844, 'wont want': 0.09724473257698538, u'<NN>': 0.11669367909238246, 'wont': 0.19448946515397075, u'<WRB>': 0.03889789303079415, 'shes got': 0.09724473257698538, 'to see': 0.09724473257698538, 'if shes': 0.09724473257698538, '<CHRISTIAN>': 0.0005320922707199021, '<CLARE>': 0.0005842114136697101, 'into a': 0.09724473257698538, u'<RB>': 0.0777957860615883, u'<VB>': 0.19448946515397075, u'<nsubj> i': 0.07682333873581844, '<JANE>': 0.0008628785449873494, u'<

{'<STACEY>': 0.0007546463449503721, 'i supposed': 0.06972690296339339, 'cheating hand': 0.06972690296339339, 'do <?>': 0.06972690296339339, 'his cheating': 0.06972690296339339, '<MAX>': 0.0010927080513522963, u'<ROOT> keep': 0.055084253341080774, '<SEAN>': 0.0014764175110267223, '<JANE>': 0.0034560766709999962, '<PHIL>': 0.0003732279679596821, '<JACK>': 0.0007293830615846632, u'<VB>': 0.05578152237071471, u'<VBZ>': 0.027890761185357354, '<?> keep': 0.06972690296339339, 'keep': 0.13945380592678677, u'<VBN>': 0.027890761185357354, '<GARRY>': 0.00121865238959769, 'cheating': 0.13945380592678677, '<TANYA>': 0.0029423446090705878, '<?>': 0.27890761185357354, 'to do': 0.06972690296339339, u'<dobj> what': 0.055084253341080774, u'<ROOT> supposed': 0.055084253341080774, 'get his': 0.06972690296339339, u'<PRP$>': 0.05578152237071471, u'<TO>': 0.027890761185357354, u'<VBG>': 0.027890761185357354, 'shtoom while': 0.06972690296339339, '<IAN>': 0.0018569618988789457, 'so what': 0.06972690296339339, 

{'<s> he': 0.1195219123505976, '<STACEY>': 0.004816593010974203, 'my head': 0.1195219123505976, '<JANE>': 0.0038842260972668983, '<PHIL>': 0.0012222837974772704, '<MaximumDepthTree>': 1.48, 'head in': 0.1195219123505976, u'<dobj> head': 0.09442231075697212, 'doing my': 0.1195219123505976, '<GARRY>': 0.00200076788721236, '<TANYA>': 0.004718190520848205, u'<PRP$>': 0.04780876494023904, u'<VBG>': 0.04780876494023904, 'in </s>': 0.1195219123505976, '<SHIRLEY>': 0.0011424625817445793, 'head': 0.2390438247011952, '<ROXY>': 0.002508303528376609, u'<PRP>': 0.04780876494023904, '<HEATHER>': 0.0011473906384291078, u'<ROOT> doing': 0.09442231075697212, '<RONNIE>': 0.0011617890108363924, 'he doing': 0.1195219123505976, u'<VBZ>': 0.04780876494023904, u'<NN>': 0.04780876494023904, '<SEAN>': 0.0012020648514425205, 'he': 0.2390438247011952, u'<nsubj> he': 0.09442231075697212, '<CHRISTIAN>': 0.009558866753057258, u'<IN>': 0.04780876494023904, '<CLARE>': 0.0012984159040079057}
{'<SHIRLEY>': 0.0035500716

{u'<VBD>': 0.06394316163410303, 'do it': 0.15985790408525757, '<SEAN>': 0.006878594061389183, '<STACEY>': 0.0017929862048678567, u'<VB>': 0.06394316163410303, '<TANYA>': 0.00917935760246879, '<MINTY>': 0.0033787530976203506, 'it <?>': 0.15985790408525757, u'<dobj> it': 0.1262877442273535, '<SHIRLEY>': 0.0034148588029523502, 'did he': 0.15985790408525757, u'<PRP>': 0.12788632326820606, '<HEATHER>': 0.0036462620460474176, '<RONNIE>': 0.008844429521700555, '<s> did': 0.15985790408525757, '<?>': 0.31971580817051515, u'<nsubj> he': 0.1262877442273535, '<CHRISTIAN>': 0.005693577895425122, '<MaximumDepthTree>': 1.48, 'he do': 0.15985790408525757, '<CLARE>': 0.001751117759510087, '<JACK>': 0.0017788551927429895, u'<ROOT> do': 0.1262877442273535}
{'<STACEY>': 0.0030391504355017584, 'help': 0.17991004497751126, '<SHIRLEY>': 0.0004994876223990203, '<MAX>': 0.0020216007296419373, 'to help': 0.08995502248875563, '<PHIL>': 0.0020788034543787214, '<MaximumDepthTree>': 4.4399999999999995, 'want': 0.17

{'of blackmailing': 0.049046321525885554, 'then shes': 0.049046321525885554, u'<VB>': 0.03923705722070844, '<PHIL>': 0.0005129523200798073, '<MAX>': 0.000847638840023905, u'<nsubj> i': 0.03874659400544959, u'<$>': 0.01961852861035422, u'<dobj> u': 0.07749318801089917, '<SEAN>': 0.000801003464369554, '<JANE>': 0.0005229179989943987, u'<IN>': 0.058855585831062665, 'doesnt': 0.09809264305177111, 'shes': 0.14713896457765666, 'get her': 0.049046321525885554, u'<CD>': 0.01961852861035422, '<STACEY>': 0.0008025004374182411, u'<ROOT> suppose': 0.03874659400544959, 'u </s>': 0.049046321525885554, '<GARRY>': 0.0012404085223257344, 'sort of': 0.049046321525885554, 'u to': 0.049046321525885554, 'cut': 0.09809264305177111, 'of the': 0.049046321525885554, 'well i': 0.049046321525885554, 'mummy': 0.09809264305177111, 'the magazine': 0.049046321525885554, 'i suppose': 0.049046321525885554, 'cut of': 0.049046321525885554, '<MINTY>': 0.0019652769467326746, 'magazine </s>': 0.049046321525885554, 'suppose

{u'<VBD>': 0.07399794450154164, '<s> were': 0.18499486125385406, '<MAX>': 0.0019676782915195286, u'<nsubj> i': 0.14614594039054474, '<s> i': 0.18499486125385406, '<PHIL>': 0.0021279846270404455, 'cant': 0.3699897225077081, '<STACEY>': 0.0020780821507356126, u'<VB>': 0.07399794450154164, 'go': 0.3699897225077081, u'<PRP>': 0.07399794450154164, 'busy': 0.3699897225077081, 'were too': 0.18499486125385406, '<TANYA>': 0.009619260209925994, u'<JJ>': 0.07399794450154164, '<MINTY>': 0.006001072381554779, u'<ROOT> go': 0.14614594039054474, u'<MD>': 0.07399794450154164, '<SHIRLEY>': 0.0070966963731344986, '<ROXY>': 0.0030657012256633857, 'go </s>': 0.18499486125385406, 'i cant': 0.18499486125385406, 'too busy': 0.18499486125385406, '<SEAN>': 0.0047197521625189556, '<BRADLEY>': 0.0020682691868781043, '<CHRISTIAN>': 0.003286870704421363, '<MaximumDepthTree>': 2.96, u'<RB>': 0.14799588900308328, 'cant go': 0.18499486125385406, '<JACK>': 0.004206441398011177, u'<ROOT> were': 0.14614594039054474, '<R

{'<STACEY>': 0.0008793773004630351, u'<>': 0.030094043887147346, 'one of': 0.07523510971786836, 'which one': 0.07523510971786836, 'hard': 0.1504702194357367, 'teenager <?>': 0.07523510971786836, 'one': 0.1504702194357367, '<JANE>': 0.00167463774759656, u'<IN>': 0.030094043887147346, '<MaximumDepthTree>': 2.96, u'<VB>': 0.06018808777429469, 'yous': 0.1504702194357367, '<GARRY>': 0.0012288312337937481, u'<PRP>': 0.030094043887147346, 'hormonal teenager': 0.07523510971786836, 'it hard': 0.07523510971786836, u'<dobj> me': 0.059435736677116005, 'hormonal': 0.1504702194357367, 'to tell': 0.07523510971786836, '<TANYA>': 0.0020005061028806792, u'<JJ>': 0.09028213166144204, '<MINTY>': 0.0011788464849426517, 'me <pause>': 0.07523510971786836, 'teenager': 0.1504702194357367, 'of yous': 0.07523510971786836, u'<TO>': 0.030094043887147346, u'<ROOT> remind': 0.059435736677116005, 'tell sometimes': 0.07523510971786836, 'tell': 0.1504702194357367, '<IAN>': 0.0023220336497290824, '<SHIRLEY>': 0.00081198

{'<s> yeah': 0.08082622361921868, '<STACEY>': 0.0026894066055533124, '<SHIRLEY>': 0.0008873427852795274, '<MAX>': 0.0018354013466489124, 'yeah': 0.16165244723843736, 'to go': 0.08082622361921868, '<SEAN>': 0.0008818929439999388, '<JANE>': 0.0019407470054539637, 'ready': 0.16165244723843736, '<MaximumDepthTree>': 4.4399999999999995, 'ready to': 0.08082622361921868, 'yeah <pause>': 0.08082622361921868, u'<VB>': 0.03233048944768747, 'go': 0.16165244723843736, '<JACK>': 0.0017740644056706508, '<pause>': 0.16165244723843736, '<TANYA>': 0.006488079285228399, u'<JJ>': 0.03233048944768747, u'<TO>': 0.03233048944768747, 'you ready': 0.08082622361921868, '<ROXY>': 0.0008805509958530009, u'<PRP>': 0.03233048944768747, u'<ROOT> are': 0.06385271665918275, u'<nsubj> you': 0.06385271665918275, u'<VBP>': 0.03233048944768747, '<RONNIE>': 0.0008791501978127497, '<?>': 0.16165244723843736, '<pause> are': 0.08082622361921868, '<BRADLEY>': 0.0008800438632941033, 'are you': 0.08082622361921868, '<CHRISTIAN>

{'<STACEY>': 0.0018857797534975037, u'<RB>': 0.13006824568446418, 'wait </s>': 0.10839020473705344, '<MAX>': 0.002206773832309189, 'it together': 0.10839020473705344, 'back': 0.21678040947410687, 'rainey': 0.21678040947410687, '<SEAN>': 0.0018381690305119423, '<JANE>': 0.003424842876658468, '<PHIL>': 0.0006381542007661755, '<MaximumDepthTree>': 4.4399999999999995, u'<VB>': 0.13006824568446418, '<s> look': 0.10839020473705344, u'<RP>': 0.043356081894821374, '<?> we': 0.10839020473705344, 'rainey wait': 0.10839020473705344, 'explain': 0.21678040947410687, '<s> rainey': 0.10839020473705344, 'get back': 0.10839020473705344, '<MINTY>': 0.0024390630650058948, 'til': 0.21678040947410687, u'<dobj> it': 0.08562826174227223, u'<MD>': 0.043356081894821374, u'<nsubj> they': 0.08562826174227223, 'wait': 0.21678040947410687, '<CHRISTIAN>': 0.0014169208473205905, 'back <?>': 0.10839020473705344, '<IAN>': 0.001474996532063802, 'stay til': 0.10839020473705344, '<SHIRLEY>': 0.0002718276226734172, '<ROXY

{u'<VBD>': 0.07399794450154164, '<MAX>': 0.001034243939601334, 'no what': 0.09249743062692703, u'<nsubj> i': 0.07307297019527237, u'<ROOT> meant': 0.07307297019527237, '<SEAN>': 0.0031438180987277166, '<JANE>': 0.005260200665490764, '<STACEY>': 0.0011186104229174527, 'what i': 0.09249743062692703, 'well no': 0.09249743062692703, 'no no': 0.09249743062692703, '<pause>': 0.18499486125385406, '<TANYA>': 0.00520378976726167, '<MINTY>': 0.0010326012654463188, 'i meant': 0.09249743062692703, '<IAN>': 0.003171622686173322, 'meant': 0.18499486125385406, u'<PRP>': 0.03699897225077082, 'meant wa': 0.09249743062692703, '<HEATHER>': 0.0010380666480697062, '<s> well': 0.09249743062692703, '<RONNIE>': 0.0010421922902773603, u'<dobj> what': 0.07307297019527237, '<BRADLEY>': 0.0010387264767936497, '<CHRISTIAN>': 0.0016566588076752063, u'<WP>': 0.03699897225077082, '<MaximumDepthTree>': 1.48, 'well': 0.18499486125385406, '<JACK>': 0.002083723813374335, u'<UH>': 0.11099691675231245, 'wa <pause>': 0.0924

{'<STACEY>': 0.0039658264241982785, u'<CC>': 0.03486682808716707, '<MAX>': 0.0028968004179453703, '<SEAN>': 0.0019616178218477775, '<MaximumDepthTree>': 2.96, 'but he': 0.08716707021791767, u'<VB>': 0.03486682808716707, 'that would': 0.08716707021791767, 'he <?>': 0.08716707021791767, 'would': 0.17433414043583534, '<TANYA>': 0.0018703602862811292, 'he wouldnt': 0.08716707021791767, '<MINTY>': 0.0009494111485563891, 'would he': 0.08716707021791767, '<GARRY>': 0.0015934335973486178, u'<DT>': 0.03486682808716707, u'<MD>': 0.06973365617433414, 'do that': 0.08716707021791767, u'<dobj> that': 0.06886198547215495, '<IAN>': 0.000984772539060561, '<SHIRLEY>': 0.0009014645991973436, '<ROXY>': 0.0018119557298395229, u'<PRP>': 0.06973365617433414, '<HEATHER>': 0.0029353076382338176, '<s> but': 0.08716707021791767, '<RONNIE>': 0.0029704636811924063, '<?>': 0.17433414043583534, 'wouldnt do': 0.08716707021791767, u'<nsubj> he': 0.1377239709443099, '<CHRISTIAN>': 0.001467776878714725, 'wouldnt': 0.174

{u'<nsubj> her': 0.07107630789736755, '<STACEY>': 0.0007698769002505402, 'in case': 0.08997000999666778, 'until then': 0.08997000999666778, u'<CC>': 0.03598800399866711, '<MAX>': 0.0005224775716632396, u'<nsubj> i': 0.07107630789736755, 'back': 0.17994001999333556, '<SEAN>': 0.0016582135565631657, '<JANE>': 0.0012844310099736679, u'<IN>': 0.07197600799733422, '<MaximumDepthTree>': 5.92, 'im': 0.17994001999333556, u'<VB>': 0.07197600799733422, u'<EX>': 0.03598800399866711, '<GARRY>': 0.0024311775345522402, 'make': 0.17994001999333556, 'youll both': 0.08997000999666778, 'then </s>': 0.08997000999666778, 'later in': 0.08997000999666778, 'very welcome': 0.08997000999666778, '<CLARE>': 0.0013794204418008898, '<TANYA>': 0.0031974915631408926, u'<JJ>': 0.07197600799733422, u'<NNP>': 0.03598800399866711, 'any problem': 0.08997000999666778, '<MINTY>': 0.0007624292073777625, 'case there': 0.08997000999666778, 'both make': 0.08997000999666778, u'<DT>': 0.07197600799733422, u'<MD>': 0.035988003998

In [38]:
# Function provided in lectures to calculate most likely tag sequence by the Viterbi process

# Viterbi:
# The best tag sequence?
#
# The method above lets us determine the probability for a single tag sequence.
# Calculated using the probability from all possible tag sequences using the Brown corpus.

brown_tags_words = [ ]
for sent in brown.tagged_sents():
    # sent is a list of word/tag pairs
    # add START/START at the beginning
    brown_tags_words.append( ("START", "START") )
    # then all the tag/word pairs for the word/tag pairs in the sentence.
    # shorten tags to 2 characters each
    brown_tags_words.extend([ (tag[:2], word) for (word, tag) in sent ])
    # then END/END
    brown_tags_words.append( ("END", "END") )
    
brown_tags = [tag for (tag, word) in brown_tags_words ]

# what is the list of all tags?
distinct_tags = set(brown_tags)

# conditional frequency distribution
cfd_tagwords = nltk.ConditionalFreqDist(brown_tags_words)
# conditional probability distribution
cpd_tagwords = nltk.ConditionalProbDist(cfd_tagwords, nltk.MLEProbDist)

# make conditional frequency distribution:
# count(t{i-1} ti)
cfd_tags= nltk.ConditionalFreqDist(nltk.bigrams(brown_tags))
# make conditional probability distribution, using
# maximum likelihood estimate:
# P(ti | t{i-1})
cpd_tags = nltk.ConditionalProbDist(cfd_tags, nltk.MLEProbDist)

def Viterbi(sentence):
    #sentence = ["I", "like", "it", ",", "don't", "you"]

    # viterbi:
    # for each step i in 1 .. sentlen, store a dictionary that maps each tag X
    # to the probability of the best tag sequence of length i that ends in X
    viterbi = [ ]

    # backpointer:
    # for each step i in 1..sentlen, store a dictionary that maps each tag X
    # to the previous tag in the best tag sequence of length i that ends in X
    backpointer = [ ]

    first_viterbi = { }
    first_backpointer = { }
    for tag in distinct_tags:
        # don't record anything for the START tag
        if tag == "START": continue
        first_viterbi[ tag ] = cpd_tags["START"].prob(tag) * cpd_tagwords[tag].prob( sentence[0] )
        first_backpointer[ tag ] = "START"

    viterbi.append(first_viterbi)
    backpointer.append(first_backpointer)

    current_best = max(first_viterbi.keys(), key = lambda tag: first_viterbi[ tag ])

    for wordindex in range(1, len(sentence)):
        this_viterbi = { }
        this_backpointer = { }
        prev_viterbi = viterbi[-1]

        for tag in distinct_tags:
            # don't record anything for the START tag
            if tag == "START": continue

            # if this tag is X and the current word is w, then find the previous tag Y such that
            # the best tag sequence that ends in X actually ends in Y X that is, the Y that maximizes
            # prev_viterbi[ Y ] * P(X | Y) * P( w | X) 
            # The following command has the same notation that you saw in the sorted() command.
            best_previous = max(prev_viterbi.keys(),
                                key = lambda prevtag: \
                prev_viterbi[ prevtag ] * cpd_tags[prevtag].prob(tag) * cpd_tagwords[tag].prob(sentence[wordindex]))

            # Instead, we can also use the following longer code:
            # best_previous = None
            # best_prob = 0.0
            # for prevtag in distinct_tags:
            #    prob = prev_viterbi[ prevtag ] * cpd_tags[prevtag].prob(tag) * cpd_tagwords[tag].prob(sentence[wordindex])
            #    if prob > best_prob:
            #        best_previous= prevtag
            #        best_prob = prob
            #
            this_viterbi[ tag ] = prev_viterbi[ best_previous] * \
                cpd_tags[ best_previous ].prob(tag) * cpd_tagwords[ tag].prob(sentence[wordindex])
            this_backpointer[ tag ] = best_previous

        current_best = max(this_viterbi.keys(), key = lambda tag: this_viterbi[ tag ])

        # done with all tags in this iteration so store the current viterbi step
        viterbi.append(this_viterbi)
        backpointer.append(this_backpointer)


    # done with all words in the sentence. now find the probability of each tag
    # to have "END" as the next tag, and use that to find the overall best sequence
    prev_viterbi = viterbi[-1]
    best_previous = max(prev_viterbi.keys(),
                        key = lambda prevtag: prev_viterbi[ prevtag ] * cpd_tags[prevtag].prob("END"))

    prob_tagsequence = prev_viterbi[ best_previous ] * cpd_tags[ best_previous].prob("END")

    # best tagsequence: we store this in reverse for now, will invert later
    best_tagsequence = [ "END", best_previous ]
    # invert the list of backpointers
    backpointer.reverse()

    # go backwards through the list of backpointers
    # (or in this case forward, because we have inverter the backpointer list)
    # in each case:
    # the following best tag is the one listed under the backpointer for the current best tag
    current_best_tag = best_previous
    for bp in backpointer:
        best_tagsequence.append(bp[current_best_tag])
        current_best_tag = bp[current_best_tag]

    best_tagsequence.reverse()
    
    return ' '.join(best_tagsequence)