In [1]:
# %load q4_sentiment.py
#!/usr/bin/env python

import argparse
import numpy as np
import matplotlib

matplotlib.use('agg')
import matplotlib.pyplot as plt
import itertools

from utils.treebank import StanfordSentiment
import utils.glove as glove

from q3_sgd import load_saved_params, sgd

# We will use sklearn here because it will run faster than implementing
# ourselves. However, for other parts of this assignment you must implement
# the functions yourself!
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

In [2]:
def getArguments():
    parser = argparse.ArgumentParser()
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument("--pretrained", dest="pretrained", action="store_true",
                       help="Use pretrained GloVe vectors.")
    group.add_argument("--yourvectors", dest="yourvectors", action="store_true",
                       help="Use your vectors from q3.")
    return parser.parse_args()

In [3]:
def getSentenceFeatures(tokens, wordVectors, sentence):
    """
    Obtain the sentence feature for sentiment analysis by averaging its
    word vectors
    """

    # Implement computation for the sentence features given a sentence.

    # Inputs:
    # tokens -- a dictionary that maps words to their indices in
    #           the word vector list
    # wordVectors -- word vectors (each row) for all tokens
    # sentence -- a list of words in the sentence of interest

    # Output:
    # - sentVector: feature vector for the sentence

    sentVector = np.zeros((wordVectors.shape[1],))

    ### YOUR CODE HERE
    for s in sentence:
        sentVector += wordVectors[tokens[s], :]

    sentVector *= 1.0 / len(sentence)
    ### END YOUR CODE

    assert sentVector.shape == (wordVectors.shape[1],)
    return sentVector

In [4]:
def getRegularizationValues():
    """Try different regularizations

    Return a sorted list of values to try.
    """
    values = None  # Assign a list of floats in the block below
    ### YOUR CODE HERE
    values = np.logspace(-4, 2, num=100, base=10)
    ### END YOUR CODE
    return sorted(values)

In [5]:
def chooseBestModel(results):
    """Choose the best model based on parameter tuning on the dev set

    Arguments:
    results -- A list of python dictionaries of the following format:
        {
            "reg": regularization,
            "clf": classifier,
            "train": trainAccuracy,
            "dev": devAccuracy,
            "test": testAccuracy
        }

    Returns:
    Your chosen result dictionary.
    """
    bestResult = None

    ### YOUR CODE HERE
    bestResult = max(results, key=lambda x: x["dev"])
    ### END YOUR CODE

    return bestResult

In [6]:
def accuracy(y, yhat):
    """ Precision for classifier """
    assert (y.shape == yhat.shape)
    return np.sum(y == yhat) * 100.0 / y.size

In [7]:
def plotRegVsAccuracy(regValues, results, filename):
    """ Make a plot of regularization vs accuracy """
    plt.plot(regValues, [x["train"] for x in results])
    plt.plot(regValues, [x["dev"] for x in results])
    plt.xscale('log')
    plt.xlabel("regularization")
    plt.ylabel("accuracy")
    plt.legend(['train', 'dev'], loc='upper left')
    plt.savefig(filename)

In [8]:
def outputConfusionMatrix(features, labels, clf, filename):
    """ Generate a confusion matrix """
    pred = clf.predict(features)
    cm = confusion_matrix(labels, pred, labels=range(5))
    plt.figure()
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Reds)
    plt.colorbar()
    classes = ["- -", "-", "neut", "+", "+ +"]
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes)
    plt.yticks(tick_marks, classes)
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig(filename)

In [9]:
def outputPredictions(dataset, features, labels, clf, filename):
    """ Write the predictions to file """
    pred = clf.predict(features)
    with open(filename, "w") as f:
        print >> f, "True\tPredicted\tText"
        for i in xrange(len(dataset)):
            print >> f, "%d\t%d\t%s" % (
                labels[i], pred[i], " ".join(dataset[i][0]))

In [10]:
# def main(args):
#     """ Train a model to do sentiment analyis"""

# Load the dataset
dataset = StanfordSentiment()
tokens = dataset.tokens()
nWords = len(tokens)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)






In [62]:
# args.yourvectors

In [11]:
m,wordVectors,n=load_saved_params()

In [12]:
wordVectors

array([[-0.35445171, -0.11411778,  0.49013939, ..., -0.39595843,
         0.86938398, -0.30683067],
       [-0.24849273, -0.00464948,  0.52670349, ..., -0.47312398,
         0.92885102, -0.19430682],
       [-0.35271386, -0.12005766,  0.52286047, ..., -0.3763784 ,
         0.78781546, -0.25352223],
       ...,
       [ 0.16320334,  0.02917343, -0.32326368, ...,  0.25090323,
        -0.53672478,  0.1288968 ],
       [ 0.16907001,  0.03304063, -0.31467863, ...,  0.25022324,
        -0.52893953,  0.13077785],
       [ 0.21568874,  0.03968185, -0.41954364, ...,  0.32712662,
        -0.69308084,  0.17521279]])

In [None]:
# if args.yourvectors:
#     _, wordVectors, _ = load_saved_params()
#     wordVectors = np.concatenate(
#         (wordVectors[:nWords, :], wordVectors[nWords:, :]),
#         axis=1)
# elif args.pretrained:
#     wordVectors = glove.loadWordVectors(tokens)

In [13]:
dimVectors = wordVectors.shape[1]

In [14]:
path = "/Users/kevin/notebooks/cs224n-assigments/assignment1/utils/datasets/stanfordSentimentTreebank"

In [15]:
split = [[] for i in range(3)]
with open(path + "/datasetSplit.txt", "r") as f:
            first = True
            for line in f:
                if first:
                    first = False
                    continue

                splitted = line.strip().split(",")
                if splitted[1]==3:
                    print(splitted[0])
                split[int(splitted[1]) - 1] += [int(splitted[0]) - 1]

In [23]:
trainset = dataset.getTrainSentences()
nTrain = len(trainset)

In [16]:
len(dataset.sentences())

11855

In [17]:
dictionary = dict()
phrases = 0
with open(path + "/dictionary.txt", "r") as f:
    for line in f:
        line = line.strip()
        if not line: continue
        splitted = line.split("|")
        dictionary[splitted[0].lower()] = int(splitted[1])
        phrases += 1

In [18]:
labels = [0.0] * phrases

In [19]:
with open(path + "/sentiment_labels.txt", "r") as f:
    first = True
    for line in f:
        if first:
            first = False
            continue

        line = line.strip()
        if not line: continue
        splitted = line.split("|")
        labels[int(splitted[0])] = float(splitted[1])

In [20]:
dataset.numSentences()

11855

In [21]:
sent_labels = [0.0] *dataset.numSentences()
sentences = dataset.sentences()

In [22]:
for i in range(dataset.numSentences()):
    
    sentence = sentences[i]
#     print("i",i)
#     print(sentence)
#     print(" ".join(sentence).replace('-lrb-', '(').replace('-rrb-', ')'))
    full_sent = " ".join(sentence).replace('-lrb-', '(').replace('-rrb-', ')')
#     print('full_sent',full_sent)
#     print(full_sent.replace('ã','Ã').encode('latin1').decode('utf8'))
    try:
#         print('dic',dictionary[full_sent])
        sent_labels[i] = labels[dictionary[full_sent]]
    except KeyError:
        print(i)
        continue 

    

# self._sent_labels = sent_labels
# return self._sent_labels

sentence_index	sentence

1	the rock is destined to be the 21st century 's new `` conan '' and that he 's going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .

2	the gorgeously elaborate continuation of `` the lord of the rings '' trilogy is so huge that a column of words can not adequately describe co-writer\/director peter jackson 's expanded vision of j.r.r. tolkien 's middle-earth .

3	effective but too-tepid biopic

4	if you sometimes like to go to the movies to have fun , wasabi is a good place to start .

5	emerges as something rare , an issue movie that 's so honest and keenly observed that it does n't feel like one .

6	the film provides some great insight into the neurotic mindset of all comics -- even those who have reached the absolute top of the game .

7	offers that rare combination of entertainment and education .

8	perhaps no picture ever made has more literally showed that the road to hell is paved with good intentions


1247	we root for -lrb- clara and paul -rrb- , even like them , though perhaps it 's an emotion closer to pity .

1248	the best film about baseball to hit theaters since field of dreams .

1249	instead of a hyperbolic beat-charged urban western , it 's an unpretentious , sociologically pointed slice of life .

1250	the film tunes into a grief that could lead a man across centuries .

1251	if the count of monte cristo does n't transform caviezel into a movie star , then the game is even more rigged than it was two centuries ago .

1252	-lrb- d -rrb- oes n't bother being as cloying or preachy as equivalent evangelical christian movies -- maybe the filmmakers know that the likely audience will already be among the faithful .

1253	as a tolerable diversion , the film suffices ; a triumph , however , it is not .

1254	if director michael dowse only superficially understands his characters , he does n't hold them in contempt .

1255	if your taste runs to ` difficult ' films you absolutely ca

2246	the journey to the secret 's eventual discovery is a separate adventure , and thrill enough .

2247	a quiet , disquieting triumph .

2248	darkly funny and frequently insightful .

2249	... the tale of her passionate , tumultuous affair with musset unfolds as sand 's masculine persona , with its love of life and beauty , takes form .

2250	if you want to see a train wreck that you ca n't look away from , then look no further , because here it is .

2251	there 's so much to look at in metropolis you hate to tear your eyes away from the images long enough to read the subtitles .

2252	the search for redemption makes for a touching love story , mainly because blanchett and ribisi compellingly tap into a spiritual aspect of their characters ' suffering .

2253	a film of ideas and wry comic mayhem .

2254	at its worst the screenplay is callow , but at its best it is a young artist 's thoughtful consideration of fatherhood .

2255	a worthwhile documentary , whether you 're into rap or no


3246	the rich performances by friel -- and especially williams , an american actress who becomes fully english -- round out the square edges .

3247	the new insomnia is a surprisingly faithful remake of its chilly predecessor , and when it does elect to head off in its own direction , it employs changes that fit it well rather than ones that were imposed for the sake of commercial sensibilities .

3248	a film in a class with spike lee 's masterful do the right thing .

3249	jagger , stoppard and director michael apted ... deliver a riveting and surprisingly romantic ride .

3250	greengrass -lrb- working from don mullan 's script -rrb- forgoes the larger socio-political picture of the situation in northern ireland in favour of an approach that throws one in the pulsating thick of a truly frightening situation .

3251	a thought-provoking and often-funny drama about isolation .

3252	whatever one makes of its political edge , this is beautiful filmmaking from one of french cinema 's mast

4245	renner carries much of the film with a creepy and dead-on performance .

4246	jarecki and gibney do find enough material to bring kissinger 's record into question and explain how the diplomat 's tweaked version of statecraft may have cost thousands and possibly millions of lives .

4247	the spaniel-eyed jean reno infuses hubert with a mixture of deadpan cool , wry humor and just the measure of tenderness required to give this comic slugfest some heart .

4248	aniston has at last decisively broken with her friends image in an independent film of satiric fire and emotional turmoil .

4249	a mildly enjoyable if toothless adaptation of a much better book .

4250	unexpected , and often contradictory , truths emerge .

4251	300 years of russian history and culture compressed into an evanescent , seamless and sumptuous stream of consciousness .

4252	intelligent , caustic take on a great writer and dubious human being .

4253	may take its sweet time to get wherever it 's going , but if 


5245	williams creates a stunning , taxi driver-esque portrayal of a man teetering on the edge of sanity .

5246	if you 're in the right b-movie frame of mind , it may just scare the pants off you .

5247	a movie of riveting power and sadness .

5248	both a detective story and a romance spiced with the intrigue of academic skullduggery and politics .

5249	ludicrous , but director carl franklin adds enough flourishes and freak-outs to make it entertaining .

5250	director roger kumble offers just enough sweet and traditional romantic comedy to counter the crudity .

5251	and there 's the inimitable diaz , holding it all together .

5252	spielberg 's picture is smarter and subtler than -lrb- total recall and blade runner -rrb- , although its plot may prove too convoluted for fun-seeking summer audiences .

5253	it 's got all the familiar bruckheimer elements , and schumacher does probably as good a job as anyone at bringing off the hopkins\/rock collision of acting styles and onscreen p

6244	empire ca n't make up its mind whether it wants to be a gangster flick or an art film .

6245	it does n't work as either .

6246	given the fact that virtually no one is bound to show up at theatres for it , the project should have been made for the tube .

6247	possession is in the end an honorable , interesting failure .

6248	it falls far short of poetry , but it 's not bad prose .

6249	jonathan parker 's bartleby should have been the be-all-end-all of the modern-office anomie films .

6250	there may have been a good film in `` trouble every day , '' but it is not what is on the screen .

6251	unfortunately , carvey 's rubber-face routine is no match for the insipid script he has crafted with harris goldberg .

6252	viewed as a comedy , a romance , a fairy tale , or a drama , there 's nothing remotely triumphant about this motion picture .

6253	there 's something unintentionally comic in the film 's drumbeat about authenticity , given the stale plot and pornographic way the fi


7244	occasionally funny , sometimes inspiring , often boring .

7245	a movie in which two not very absorbing characters are engaged in a romance you ca n't wait to see end .

7246	the predominantly amateur cast is painful to watch , so stilted and unconvincing are the performances .

7247	who are ` they ' ?

7248	well , they 're ` they ' .

7249	they 're the unnamed , easily substitutable forces that serve as whatever terror the heroes of horror movies try to avoid .

7250	they exist for hushed lines like `` they 're back ! ''

7251	, `` they 're out there ! ''

7252	and `` they 're coming ! ''

7253	elegantly crafted but emotionally cold , a puzzle whose intricate construction one can admire but is difficult to connect with on any deeper level .

7254	were dylan thomas alive to witness first-time director ethan hawke 's strained chelsea walls , he might have been tempted to change his landmark poem to , ` do not go gentle into that good theatre . '

7255	the story has its redundancie


8123	largely a for-fans artifact .

8124	there 's no denying the elaborateness of the artist 's conceptions , nor his ability to depict them with outrageous elan , but really the whole series is so much pretentious nonsense , lavishly praised by those who equate obscurity with profundity .

8125	characters wander into predictably treacherous situations even though they should know better .

8126	there 's plenty of style in guillermo del toro 's sequel to the 1998 hit but why do we need 117 minutes to tell a tale that simply ca n't sustain more than 90 minutes .

8127	-lrb- i -rrb- f you 've been to more than one indie flick in your life , chances are you 've already seen this kind of thing .

8128	first-time director joão pedro rodrigues ' unwillingness to define his hero 's background or motivations becomes more and more frustrating as the film goes on .

8129	no reason for anyone to invest their hard-earned bucks into a movie which obviously did n't invest much into itself either .



8743	will only satisfy those who ca n't tell the difference between the good , the bad and the ugly .

8744	this kind of dark comedy requires a delicate , surgical touch .

8745	but director danny devito and screenwriter adam resnick -lrb- remember cabin boy ? -rrb-

8746	just pound away .

8747	at times , however , dogtown and z-boys lapses into an insider 's lingo and mindset that the uninitiated may find hard to follow , or care about .

8748	rather quickly , the film falls into a soothing formula of brotherly conflict and reconciliation .

8749	screenwriters scott abbott and michael petroni have turned rice 's complex akasha into a cartoon monster .

8750	the writers , director wally wolodarsky , and all the actors should start their own coeducational fraternity : kappa rho alpha phi .

8751	bad beyond belief and ridiculous beyond description .

8752	the new faces are interesting , but the old story is n't , especially when it starts to seem more improvised than scripted .

8753	m

9242	kitschy , flashy , overlong soap opera .

9243	for all the time we spend with these people , we never really get inside of them .

9244	yet another arnold vehicle that fails to make adequate use of his particular talents .

9245	sandra bullock , despite downplaying her good looks , carries a little too much ai n't - she-cute baggage into her lead role as a troubled and determined homicide cop to quite pull off the heavy stuff .

9246	an undistinguished attempt to make a classic theater piece cinematic .

9247	too many scenarios in which the hero might have an opportunity to triumphantly sermonize , and too few that allow us to wonder for ourselves if things will turn out okay .

9248	there is simply not enough of interest onscreen to sustain its seventy-minute running time .

9249	a wordy wisp of a comedy .

9250	broomfield 's style of journalism is hardly journalism at all , and even those with an avid interest in the subject will grow impatient .

9251	-lrb- seagal 's -rrb- stre


10242	in my own very humble opinion , in praise of love lacks even the most fragmented charms i have found in almost all of his previous works .

10243	the script is too mainstream and the psychology too textbook to intrigue .

10244	muddled , simplistic and more than a little pretentious .

10245	meandering and glacially paced , and often just plain dull .

10246	a disaster of a drama , saved only by its winged assailants .

10247	a road trip that will get you thinking , ` are we there yet ? '

10248	director elie chouraqui , who co-wrote the script , catches the chaotic horror of war , but why bother if you 're going to subjugate truth to the tear-jerking demands of soap opera ?

10249	dong never pushes for insights beyond the superficial tensions of the dynamic he 's dissecting , and the film settles too easily along the contours of expectation .

10250	if there was any doubt that peter o'fallon did n't have an original bone in his body , a rumor of angels should dispel it .

10251

11241	sadly , as blood work proves , that was a long , long time ago .

11242	blue crush has all the trappings of an energetic , extreme-sports adventure , but ends up more of a creaky `` pretty woman '' retread , with the emphasis on self-empowering schmaltz and big-wave surfing that gives pic its title an afterthought .

11243	this movie plays like an extended dialogue exercise in retard 101 .

11244	what we get in feardotcom is more like something from a bad clive barker movie .

11245	in other words , it 's badder than bad .

11246	if they broke out into elaborate choreography , singing and finger snapping it might have held my attention , but as it stands i kept looking for the last exit from brooklyn .

11247	a sloppy slapstick throwback to long gone bottom-of-the-bill fare like the ghost and mr. chicken .

11248	a small independent film suffering from a severe case of hollywood-itis .

11249	where the film falters is in its tone .

11250	the story alone could force you to scratc

In [30]:
'A mimetic approximation of better films like Contempt and 8Â 1\/2 .'.encode('latin1').decode('utf8')

'A mimetic approximation of better films like Contempt and 8\xa01\\/2 .'

In [44]:
'A mimetic approximation of better films like Contempt and 8Â 1\/2'.encode('latin1').decode('utf8')

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc2 in position 1: unexpected end of data

In [None]:
# Load the train set
trainset = dataset.getTrainSentences()
nTrain = len(trainset)

In [24]:
trainFeatures = np.zeros((nTrain, dimVectors))
trainLabels = np.zeros((nTrain,), dtype=np.int32)

In [25]:
for i in range(nTrain):
    words, trainLabels[i] = trainset[i]
    trainFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)

In [26]:
# Prepare dev set features
devset = dataset.getDevSentences()
nDev = len(devset)
devFeatures = np.zeros((nDev, dimVectors))
devLabels = np.zeros((nDev,), dtype=np.int32)

In [28]:
for i in range(nDev):
    words, devLabels[i] = devset[i]
    devFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)

In [29]:
# Prepare test set features
testset = dataset.getTestSentences()
nTest = len(testset)
testFeatures = np.zeros((nTest, dimVectors))
testLabels = np.zeros((nTest,), dtype=np.int32)

In [31]:
for i in range(nTest):
    words, testLabels[i] = testset[i]
    testFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)

In [32]:
# We will save our results from each run
results = []
regValues = getRegularizationValues()

In [44]:
for reg in regValues:
    print ("Training for reg=%f" % reg)
    # Note: add a very small number to regularization to please the library
    clf = LogisticRegression(C=1.0 / (reg + 1e-12))
    clf.fit(trainFeatures, trainLabels)

    # Test on train set
    pred = clf.predict(trainFeatures)
    trainAccuracy = accuracy(trainLabels, pred)
    print ("Train accuracy (%%): %f" % trainAccuracy)

    # Test on dev set
    pred = clf.predict(devFeatures)
    devAccuracy = accuracy(devLabels, pred)
    print ("Dev accuracy (%%): %f" % devAccuracy)

    # Test on test set
    # Note: always running on test is poor style. Typically, you should
    # do this only after validation.
    pred = clf.predict(testFeatures)
    testAccuracy = accuracy(testLabels, pred)
    print ("Test accuracy (%%): %f" % testAccuracy)

    results.append({
        "reg": reg,
        "clf": clf,
        "train": trainAccuracy,
        "dev": devAccuracy,
        "test": testAccuracy})

Training for reg=0.000100
Train accuracy (%): 31.097846
Dev accuracy (%): 30.699364
Test accuracy (%): 30.180995
Training for reg=0.000115
Train accuracy (%): 31.097846
Dev accuracy (%): 30.699364
Test accuracy (%): 30.180995
Training for reg=0.000132
Train accuracy (%): 31.097846
Dev accuracy (%): 30.699364
Test accuracy (%): 30.180995
Training for reg=0.000152
Train accuracy (%): 31.109551
Dev accuracy (%): 30.699364
Test accuracy (%): 30.180995
Training for reg=0.000175
Train accuracy (%): 31.097846
Dev accuracy (%): 30.699364
Test accuracy (%): 30.180995
Training for reg=0.000201
Train accuracy (%): 31.097846
Dev accuracy (%): 30.699364
Test accuracy (%): 30.180995
Training for reg=0.000231
Train accuracy (%): 31.086142
Dev accuracy (%): 30.699364
Test accuracy (%): 30.180995
Training for reg=0.000266
Train accuracy (%): 31.086142
Dev accuracy (%): 30.699364
Test accuracy (%): 30.180995
Training for reg=0.000305
Train accuracy (%): 31.074438
Dev accuracy (%): 30.699364
Test accurac

Train accuracy (%): 27.890918
Dev accuracy (%): 26.339691
Test accuracy (%): 24.072398
Training for reg=4.037017
Train accuracy (%): 27.738764
Dev accuracy (%): 25.976385
Test accuracy (%): 24.117647
Training for reg=4.641589
Train accuracy (%): 27.551498
Dev accuracy (%): 25.794732
Test accuracy (%): 23.574661
Training for reg=5.336699
Train accuracy (%): 27.516386
Dev accuracy (%): 25.794732
Test accuracy (%): 23.484163
Training for reg=6.135907
Train accuracy (%): 27.434457
Dev accuracy (%): 25.794732
Test accuracy (%): 23.438914
Training for reg=7.054802
Train accuracy (%): 27.340824
Dev accuracy (%): 25.794732
Test accuracy (%): 23.348416
Training for reg=8.111308
Train accuracy (%): 27.317416
Dev accuracy (%): 25.613079
Test accuracy (%): 23.167421
Training for reg=9.326033
Train accuracy (%): 27.282303
Dev accuracy (%): 25.703906
Test accuracy (%): 23.076923
Training for reg=10.722672
Train accuracy (%): 27.305712
Dev accuracy (%): 25.613079
Test accuracy (%): 23.031674
Training

In [45]:
# Print the accuracies
print( "")
print( "=== Recap ===")
print ("Reg\t\tTrain\tDev\tTest")


=== Recap ===
Reg		Train	Dev	Test


In [46]:
for result in results:
    print ("%.2E\t%.3f\t%.3f\t%.3f" % (
        result["reg"],
        result["train"],
        result["dev"],
        result["test"]))
print ("")

1.00E-04	31.098	30.699	30.181
1.15E-04	31.098	30.699	30.181
1.32E-04	31.098	30.699	30.181
1.52E-04	31.110	30.699	30.181
1.75E-04	31.098	30.699	30.181
2.01E-04	31.098	30.699	30.181
2.31E-04	31.086	30.699	30.181
2.66E-04	31.086	30.699	30.181
3.05E-04	31.074	30.699	30.181
3.51E-04	31.074	30.699	30.181
4.04E-04	31.063	30.699	30.181
4.64E-04	31.063	30.699	30.181
5.34E-04	31.063	30.699	30.181
6.14E-04	31.051	30.699	30.181
7.05E-04	31.051	30.699	30.181
8.11E-04	31.051	30.699	30.181
9.33E-04	31.051	30.699	30.136
1.07E-03	31.051	30.699	30.136
1.23E-03	31.051	30.699	30.136
1.42E-03	31.016	30.609	30.226
1.63E-03	31.039	30.609	30.136
1.87E-03	31.051	30.609	30.136
2.15E-03	31.074	30.699	30.045
2.48E-03	31.039	30.518	30.045
2.85E-03	31.039	30.518	30.045
3.27E-03	31.028	30.518	30.045
3.76E-03	31.016	30.518	29.955
4.33E-03	31.016	30.518	29.955
4.98E-03	31.028	30.518	29.955
5.72E-03	31.016	30.518	30.000
6.58E-03	31.039	30.518	30.000
7.56E-03	31.016	30.518	30.000
8.70E-03	31.039	30.518	30.000
1.00E-02	3

In [48]:
# results

In [49]:
bestResult = chooseBestModel(results)
print ("Best regularization value: %0.2E" % bestResult["reg"])
print ("Test accuracy (%%): %f" % bestResult["test"])

# do some error analysis

Best regularization value: 1.00E-04
Test accuracy (%): 30.180995


In [50]:
if args.pretrained:
    plotRegVsAccuracy(regValues, results, "q4_reg_v_acc.png")
    outputConfusionMatrix(devFeatures, devLabels, bestResult["clf"],
                          "q4_dev_conf.png")
    outputPredictions(devset, devFeatures, devLabels, bestResult["clf"],
                      "q4_dev_pred.txt")

NameError: name 'args' is not defined