## Uncalibrated test of binary classification of movie reviews.

Perviously, we calibrated the results before using a training set, tuning the decision boundary and \Delta h for the most accuracy on that training set.

This is reasonable, and of course will result in a overall increase in performance across all dictionaries if the movie reviews (even the positive ones) are more "negative". And tuning \Delta h is reasonable too. 

Here, we won't tune the results and just choose the decision boundary as the average of word scores, or the center for the dictionary, whichever is better (because the method of construction would tell you which makes sense).

Also, more dictionaries added, and we'll consider the performace for evaluation of sentences.

In [1]:
import sys
sys.path.append("/Users/andyreagan/tools/python/labMTsimple/")
from labMTsimple.speedy import *
from labMTsimple.storyLab import *

import re
import codecs
from os import listdir,mkdir
from os.path import isfile,isdir
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import rc,rcParams
rc("xtick", labelsize=8)
rc("ytick", labelsize=8)
rc("font",**{"family":"serif","serif":["cmr10"]})
# rc("text", usetex=True)
figwidth_onecol = 8.5
figwidth_twocol = figwidth_onecol/2

import numpy as np
from json import loads
import csv
from datetime import datetime,timedelta
import pickle

from subprocess import call

from scipy.stats import pearsonr

error_logging = True
sys.path.append("/Users/andyreagan/tools/python/kitchentable")
from dogtoys import *

In [2]:
def sampleReviewsDict(numReviews,numSamples,filelist,wordsRE,test="",prefix=""):
    """Sample from all of the review."""
    if numReviews == 1:
        choose_randomly = False
    else:
        choose_randomly = True

    scores = [[0.0 for i in range(numSamples)] for j in range(len(wordsRE))]
    for i in range(numSamples):
        # print("on sample {0}".format(i))

        if choose_randomly:
            files = np.random.choice(filelist,size=numReviews,replace=False)
        else:
            files = [filelist[i]]

        # forget the string expansion
        # let"s store them as a dict
        allwordcounts = dict()
        for file in files:
            # ########################################
            # # this makes the dicts if they're needed
            # if isfile(file+".dict"):
            #     my_dict = pickle.load( open( file+".dict", "rb" ) )
            #     for word in my_dict:
            #         if word in allwordcounts:
            #             allwordcounts[word] += my_dict[word]
            #         else:
            #             allwordcounts[word] = my_dict[word]
            # else:
            #     # read the txt file
            #     f = open(file+".txt","r")
            #     rawtext = f.read()
            #     f.close()
            #     # dictify_general it
            #     tmp_dict = dict()
            #     dictify_general(rawtext,tmp_dict)
            #     pickle.dump( tmp_dict , open( file+".dict", "wb" ) )
            #     # add to the full dict
            #     dictify_general(rawtext,allwordcounts)

            # ###################################################
            # # this loads the dicts
            # my_dict = pickle.load( open( file+".dict", "rb" ) )
            # for word in my_dict:
            #     if word in allwordcounts:
            #         allwordcounts[word] += my_dict[word]
            #     else:
            #         allwordcounts[word] = my_dict[word]

            ########################################
            # this loads the files
            f = open(file+".txt","r")
            rawtext = f.read()
            f.close()
            # add to the full dict
            dictify_general(rawtext,allwordcounts)

        for j in range(len(wordsRE)):
            scores[j][i] = wordsRE[j].score(allwordcounts,center=1000.0)
    
    if len(test) > 0:
        f = open("output/{0}-{1}-{2:.0f}-{3:.0f}.csv".format(test,prefix,numReviews,numSamples),"w")
        csv_writer = csv.writer(f)
        for row in scores:
            csv_writer.writerow(row)
        f.close()
    
    return scores

In [3]:
def classifier_perf(conf_mat,v=True):
    """Given the confusion matrix, produce precision, recall, and f1-score.
    Actual going down, predicited going across."""
    
    N = conf_mat.shape[0]
    # could do these computations using matrix math...
    R = np.array([conf_mat[i,i]/conf_mat[i,:].sum() for i in range(N)])
    P = np.array([conf_mat[i,i]/conf_mat[:,i].sum() for i in range(N)])

    F1 = np.array([2*R[i]*P[i]/(R[i]+P[i]) for i in range(N)])
    if v:
        print(R)
        print(P)
        print(F1)
    return F1.mean()

In [4]:
flip = "pos"
pos_files = ["../data/moviereviews/txt_sentoken/{0}/{1}".format(flip,x.replace(".txt",""))
             for x in listdir("../data/moviereviews/txt_sentoken/{0}/".format(flip)) if ".txt" in x]
flip = "neg"
neg_files = ["../data/moviereviews/txt_sentoken/{0}/{1}".format(flip,x.replace(".txt",""))
             for x in listdir("../data/moviereviews/txt_sentoken/{0}/".format(flip)) if ".txt" in x]
all_senti_dicts = [LabMT(),ANEW(),WK(),MPQA(),
                   LIWC01(),LIWC07(),LIWC15(),
                   OL(),PANASX(),Pattern(),
                   SentiWordNet(),AFINN(),GI(),
                   WDAL(),Sent140Lex(),MaxDiff(),
                   HashtagSent(),EmoLex(),
                   SOCAL(),SenticNet(),
                   Emoticons(),SentiStrength(),VADER(),
                   Umigon(),USent(),EmoSenticNet()]

In [5]:
pos_scores = sampleReviewsDict(1,1000,pos_files,all_senti_dicts)
neg_scores = sampleReviewsDict(1,1000,neg_files,all_senti_dicts)

In [6]:
# conf_mats = [np.zeros((2,2)) for x in all_senti_dicts]
F1_scores_mean = np.zeros(len(all_senti_dicts))
F1_scores_center = np.zeros(len(all_senti_dicts))
F1_scores_trained = np.zeros(len(all_senti_dicts))
perc_scored = np.zeros(len(all_senti_dicts))
all_data = []
for i,x in enumerate(all_senti_dicts):
    data = [x.title]
    conf_mat = np.zeros((2,2))
    a = np.array(pos_scores[i])
    # print(len(a[a==1000.0]),len(a[a!=1000.0]),len(a[(a!=1000.0) & (a>x.scorelist.mean())]))
    conf_mat[0,0] = len(a[(a!=1000.0) & (a>x.scorelist.mean())])
    conf_mat[0,1] = len(a[(a!=1000.0) & (a<x.scorelist.mean())])
    a = np.array(neg_scores[i])
    conf_mat[1,0] = len(a[(a!=1000.0) & (a>x.scorelist.mean())])
    conf_mat[1,1] = len(a[(a!=1000.0) & (a<x.scorelist.mean())])
    print(conf_mat)
    F1_scores_mean[i] = classifier_perf(conf_mat)
    perc_scored[i] = 100*conf_mat.sum()/(len(pos_scores[i])+len(neg_scores[i]))
    data.append(perc_scored[i])
    data.append(conf_mat)
    data.append(F1_scores_mean[i])
    conf_mat = np.zeros((2,2))
    a = np.array(pos_scores[i])
    conf_mat[0,0] = len(a[(a!=1000.0) & (a>x.center)])
    conf_mat[0,1] = len(a[(a!=1000.0) & (a<x.center)])
    a = np.array(neg_scores[i])
    conf_mat[1,0] = len(a[(a!=1000.0) & (a>x.center)])
    conf_mat[1,1] = len(a[(a!=1000.0) & (a<x.center)])
    print(conf_mat)
    F1_scores_center[i] = classifier_perf(conf_mat)
    data.append(conf_mat)
    data.append(F1_scores_center[i])
    total_n_samples = len(pos_scores[i])+len(neg_scores[i])
    # grab 100 pos reviews
    a = np.array(pos_scores[i])
    r = np.random.choice(np.arange(len(pos_scores[i])),size=.05*total_n_samples)
    b = np.zeros(len(pos_scores[i]))
    b[r] = 1
    # grab 100 neg reviews
    a_ = np.array(neg_scores[i])
    r_ = np.random.choice(np.arange(len(neg_scores[i])),size=.05*total_n_samples)
    b_ = np.zeros(len(neg_scores[i]))
    b_[r_] = 1
    avg = np.concatenate((a[(a!=1000.0) & (b>0)],a_[(a_!=1000.0) & (b_>0)])).mean()
    conf_mat = np.zeros((2,2))
    conf_mat[0,0] = len(a[(a!=1000.0) & (a>avg) & (b<1)])
    conf_mat[0,1] = len(a[(a!=1000.0) & (a<avg) & (b<1)])
    conf_mat[1,0] = len(a_[(a_!=1000.0) & (a_>avg) & (b_<1)])
    conf_mat[1,1] = len(a_[(a_!=1000.0) & (a_<avg) & (b_<1)])
    print(conf_mat)
    F1_scores_trained[i] = classifier_perf(conf_mat)
    data.append(conf_mat)
    data.append(F1_scores_trained[i])
    untrained = np.array([F1_scores_center[i],F1_scores_mean[i],0.0])
    print(untrained)
    print(untrained[~np.isnan(untrained)])
    data.append(np.max(untrained[~np.isnan(untrained)]))
    print(x.title,F1_scores_mean[i],F1_scores_center[i],F1_scores_trained[i],perc_scored[i])
    all_data.append(data)

[[ 739.  261.]
 [ 471.  529.]]
[ 0.739  0.529]
[ 0.6107438   0.66962025]
[ 0.66877828  0.59106145]
[[ 1000.     0.]
 [ 1000.     0.]]
[ 1.  0.]
[ 0.5  nan]
[ 0.66666667         nan]
[[ 595.  307.]
 [ 347.  556.]]
[ 0.65964523  0.61572536]
[ 0.63163482  0.64426419]
[ 0.64533623  0.62967157]
[        nan  0.62991987  0.        ]
[ 0.62991987  0.        ]
labMT 0.629919866528 nan 0.637503899888 100.0
[[ 955.   45.]
 [ 962.   38.]]
[ 0.955  0.038]
[ 0.49817423  0.45783133]
[ 0.65478231  0.07017544]
[[ 974.   26.]
 [ 975.   25.]]
[ 0.974  0.025]
[ 0.49974346  0.49019608]
[ 0.6605629   0.04757374]
[[ 551.  358.]
 [ 425.  481.]]
[ 0.60616062  0.53090508]
[ 0.56454918  0.57330155]
[ 0.58461538  0.5512894 ]
[ 0.35406832  0.36247887  0.        ]
[ 0.35406832  0.36247887  0.        ]
ANEW 0.362478874595 0.354068320987 0.567952391448 100.0
[[ 1000.     0.]
 [  998.     2.]]
[ 1.     0.002]
[ 0.5005005  1.       ]
[ 0.66711141  0.00399202]
[[ 1000.     0.]
 [  999.     1.]]
[ 1.     0.001]
[ 0.5002

  ret = ret.dtype.type(ret / rcount)


In [7]:
all_data[0]

['labMT', 100.0, array([[ 739.,  261.],
        [ 471.,  529.]]), 0.62991986652847642, array([[ 1000.,     0.],
        [ 1000.,     0.]]), nan, array([[ 595.,  307.],
        [ 347.,  556.]]), 0.63750389988773248, 0.62991986652847642]

In [8]:
f = open("tables/movie-review-accuracy-untrained-perf-sorted.tex","w")
f.write(r"\begin{tabular}{l | l | c | c | c |}")
f.write("\n")
f.write(r"Rank & Title & \% Scored & F1 Trained & F1 Untrained\\")
f.write("\n")
f.write(r"\hline")
f.write("\n")
for i,x in enumerate(sorted(all_data,key=lambda x: x[-1],reverse=True)):
    print(x[0],x[1],x[-2],x[-1])
    if np.isnan(x[-2]):
        x[-2] = "--"
    else:
        x[-2] = "{0:.2f}".format(x[-2])
    if x[-1] == 0:
        x[-1] = "--"
    else:
        x[-1] = "{0:.2f}".format(x[-1])
    f.write(r"{0}. & {1} & {2:.0f} & {3} & {4}\\".format(i+1,x[0],x[1],x[-2],x[-1]))
    print(r"{0} & {1} & {2:.0f} & {3} & {4}\\".format(i+1,x[0],x[1],x[-2],x[-1]))
    f.write("\n")
f.write(r"\end{tabular}")
f.close()

OL 100.0 0.699781320057 0.705065204738
1 & OL & 100 & 0.70 & 0.71\\
HashtagSent 100.0 0.665536836052 0.663998655995
2 & HashtagSent & 100 & 0.67 & 0.66\\
MPQA 100.0 0.665380710101 0.658181208777
3 & MPQA & 100 & 0.67 & 0.66\\
SentiWordNet 100.0 0.650734596721 0.650261839477
4 & SentiWordNet & 100 & 0.65 & 0.65\\
labMT 100.0 0.637503899888 0.629919866528
5 & labMT & 100 & 0.64 & 0.63\\
AFINN 100.0 0.66768085522 0.625528058889
6 & AFINN & 100 & 0.67 & 0.63\\
Umigon 100.0 0.649815396481 0.621613114218
7 & Umigon & 100 & 0.65 & 0.62\\
GI 100.0 0.652727020429 0.605447902394
8 & GI & 100 & 0.65 & 0.61\\
SOCAL 100.0 0.714559506545 0.601253000571
9 & SOCAL & 100 & 0.71 & 0.60\\
VADER 100.0 0.672703267686 0.599666318733
10 & VADER & 100 & 0.67 & 0.60\\
WDAL 100.0 0.601152324771 0.590346760158
11 & WDAL & 100 & 0.60 & 0.59\\
SentiStrength 100.0 0.630811212872 0.580727106681
12 & SentiStrength & 100 & 0.63 & 0.58\\
EmoLex 100.0 0.653740550145 0.560225623373
13 & EmoLex & 100 & 0.65 & 0.56\\
LIWC1

In [9]:
def sampleReviewsDictSentences(numReviews,numSamples,filelist,wordsRE):
    """Sample from all of the review."""
    if numReviews == 1:
        choose_randomly = False
    else:
        choose_randomly = True

    scores = [[] for j in range(len(wordsRE))]
    for i in range(numSamples):
        file = filelist[i]
        ########################################
        # this loads the files
        f = open(file+".txt","r")
        rawtext = f.read()
        f.close()
        # add to the full dict
        sentences = [x for x in rawtext.split("\n") if len(x) > 0]
        sentence_dicts = [dictify(listify(x)) for x in sentences]
        # print(len(sentences))
        # for s in sentences:
        #    print(s)
        for j in range(len(wordsRE)):
            for sentence_dict in sentence_dicts:
                scores[j].append(wordsRE[j].score(sentence_dict,center=1000.0))

    return scores

In [10]:
pos_scores = sampleReviewsDictSentences(1,1000,pos_files,all_senti_dicts)
neg_scores = sampleReviewsDictSentences(1,1000,neg_files,all_senti_dicts)

In [11]:
len(pos_scores[0])

32937

In [40]:
all_data = []
for i,x in enumerate(all_senti_dicts):
    data = [x.title]
    conf_mat = np.zeros((2,2))
    a = np.array(pos_scores[i])
    # print(len(a[a==1000.0]),len(a[a!=1000.0]),len(a[(a!=1000.0) & (a>x.scorelist.mean())]))
    conf_mat[0,0] = len(a[(a!=1000.0) & (a>x.scorelist.mean())])
    conf_mat[0,1] = len(a[(a!=1000.0) & (a<x.scorelist.mean())])
    a = np.array(neg_scores[i])
    conf_mat[1,0] = len(a[(a!=1000.0) & (a>x.scorelist.mean())])
    conf_mat[1,1] = len(a[(a!=1000.0) & (a<x.scorelist.mean())])
    print(conf_mat)
    F1_scores_mean[i] = classifier_perf(conf_mat)
    perc_scored[i] = 100*conf_mat.sum()/(len(pos_scores[i])+len(neg_scores[i]))
    data.append(perc_scored[i])
    data.append(conf_mat)
    data.append(F1_scores_mean[i])
    conf_mat = np.zeros((2,2))
    a = np.array(pos_scores[i])
    conf_mat[0,0] = len(a[(a!=1000.0) & (a>x.center)])
    conf_mat[0,1] = len(a[(a!=1000.0) & (a<x.center)])
    a = np.array(neg_scores[i])
    conf_mat[1,0] = len(a[(a!=1000.0) & (a>x.center)])
    conf_mat[1,1] = len(a[(a!=1000.0) & (a<x.center)])
    print(conf_mat)
    F1_scores_center[i] = classifier_perf(conf_mat)
    data.append(conf_mat)
    data.append(F1_scores_center[i])
    total_n_samples = len(pos_scores[i])+len(neg_scores[i])
    # grab 100 pos reviews
    a = np.array(pos_scores[i])
    r = np.random.choice(np.arange(len(pos_scores[i])),size=.05*total_n_samples)
    b = np.zeros(len(pos_scores[i]))
    b[r] = 1
    # grab 100 neg reviews
    a_ = np.array(neg_scores[i])
    r_ = np.random.choice(np.arange(len(neg_scores[i])),size=.05*total_n_samples)
    b_ = np.zeros(len(neg_scores[i]))
    b_[r_] = 1
    avg = np.concatenate((a[(a!=1000.0) & (b>0)],a_[(a_!=1000.0) & (b_>0)])).mean()
    conf_mat = np.zeros((2,2))
    conf_mat[0,0] = len(a[(a!=1000.0) & (a>avg) & (b<1)])
    conf_mat[0,1] = len(a[(a!=1000.0) & (a<avg) & (b<1)])
    conf_mat[1,0] = len(a_[(a_!=1000.0) & (a_>avg) & (b_<1)])
    conf_mat[1,1] = len(a_[(a_!=1000.0) & (a_<avg) & (b_<1)])
    print(conf_mat)
    F1_scores_trained[i] = classifier_perf(conf_mat)
    data.append(conf_mat)
    data.append(F1_scores_trained[i])
    untrained = np.array([F1_scores_center[i],F1_scores_mean[i],0.0])
    print(untrained)
    print(untrained[~np.isnan(untrained)])
    data.append(np.max(untrained[~np.isnan(untrained)]))
    weighted_best = data[-1]*perc_scored[i]/100
    data.append(weighted_best)
    print(x.title,F1_scores_mean[i],F1_scores_center[i],F1_scores_trained[i],perc_scored[i])
    all_data.append(data)

[[ 19210.  13595.]
 [ 15699.  15870.]]
[ 0.58558147  0.50270835]
[ 0.55028789  0.53860512]
[ 0.56738636  0.52003801]
[[ 30832.   1956.]
 [ 28833.   2717.]]
[ 0.94034403  0.08611727]
[ 0.51675186  0.58142521]
[ 0.66697673  0.15001518]
[[ 16281.  13449.]
 [ 13201.  15326.]]
[ 0.54762866  0.53724542]
[ 0.55223526  0.53261512]
[ 0.54992231  0.53492025]
[ 0.40849596  0.54371219  0.        ]
[ 0.40849596  0.54371219  0.        ]
labMT 0.54371218594 0.408495958924 0.542421280078 99.4653893696
[[ 17054.   4502.]
 [ 14963.   4577.]]
[ 0.79114864  0.23423746]
[ 0.53265453  0.50413041]
[ 0.63666399  0.31985744]
[[ 17540.   4001.]
 [ 15444.   4088.]]
[ 0.81426118  0.20929756]
[ 0.53177298  0.50537767]
[ 0.6433746   0.29600666]
[[ 12028.   7487.]
 [ 10133.   7499.]]
[ 0.6163464   0.42530626]
[ 0.54275529  0.50040037]
[ 0.5772147   0.45980747]
[ 0.46969063  0.47826071  0.        ]
[ 0.46969063  0.47826071  0.        ]
ANEW 0.478260714278 0.469690630203 0.518511086088 63.4981458591
[[ 28124.   4018.]



[[ 13691.   8218.]
 [ 10396.  10107.]]
[ 0.62490301  0.49295225]
[ 0.56839789  0.55154161]
[ 0.59531264  0.52060369]
[[ 12271.   8820.]
 [  9235.  10488.]]
[ 0.58181215  0.53176494]
[ 0.57058495  0.54319453]
[ 0.57614386  0.53741897]
[ 0.55795816  0.54228956  0.        ]
[ 0.55795816  0.54228956  0.        ]
AFINN 0.542289562355 0.557958161971 0.556781417279 69.7017923362
[[ 18459.   8274.]
 [ 15181.   9924.]]
[ 0.69049489  0.39529974]
[ 0.54872176  0.54533465]
[ 0.61149852  0.45835162]
[[ 14146.   8278.]
 [ 10856.   9928.]]
[ 0.63084196  0.47767513]
[ 0.56579474  0.54531473]
[ 0.59655042  0.50925878]
[[ 12812.  11414.]
 [  9770.  12887.]]
[ 0.5288533   0.56878669]
[ 0.56735453  0.53030739]
[ 0.54742779  0.54887346]
[ 0.5529046   0.53492507  0.        ]
[ 0.5529046   0.53492507  0.        ]
GI 0.534925066455 0.552904599844 0.548150625756 80.0957972806
[[ 17732.  14995.]
 [ 15159.  16254.]]
[ 0.54181563  0.51742909]
[ 0.53911404  0.52014464]
[ 0.54046146  0.51878331]
[[ 32585.    125.]


  ret = ret.dtype.type(ret / rcount)


In [41]:
all_data[0]

['labMT', 99.465389369592089, array([[ 19210.,  13595.],
        [ 15699.,  15870.]]), 0.54371218594004644, array([[ 30832.,   1956.],
        [ 28833.,   2717.]]), 0.40849595892370738, array([[ 16281.,  13449.],
        [ 13201.,  15326.]]), 0.54242128007822332, 0.54371218594004644, 0.54080544279518772]

In [42]:
f = open("tables/movie-review-sentence-accuracy-untrained-perf-sorted.tex","w")
f.write(r"\begin{tabular}{l | l | c | c | c | c }")
f.write("\n")
f.write(r"Rank & Title & \% Scored & F1 Trained of Scored & F1 Untrained of Scored & F1 Untrained, All\\")
f.write("\n")
f.write(r"\hline")
f.write("\n")
for i,x in enumerate(sorted(all_data,key=lambda x: x[-1],reverse=True)):
    if np.isnan(x[-3]):
        x[-3] = "--"
    else:
        x[-3] = "{0:.2f}".format(x[-3])
    if x[-2] == 0:
        x[-2] = "--"
    else:
        x[-2] = "{0:.2f}".format(x[-2])
    if x[-1] == 0:
        x[-1] = "--"
    else:
        x[-1] = "{0:.2f}".format(x[-1])
    # print(x[0],x[1],x[-2],x[-1])
    f.write(r"{0}. & {1} & {2:.0f} & {3} & {4} & {5}\\".format(i+1,x[0],x[1],x[-3],x[-2],x[-1]))
    print(r"{0}. & {1} & {2:.0f} & {3} & {4} & {5}\\".format(i+1,x[0],x[1],x[-3],x[-2],x[-1]))
    f.write("\n")
f.write(r"\end{tabular}")
f.close()

1. & HashtagSent & 100 & 0.55 & 0.55 & 0.55\\
2. & LIWC15 & 99 & 0.53 & 0.55 & 0.55\\
3. & LIWC07 & 99 & 0.53 & 0.55 & 0.54\\
4. & LIWC01 & 99 & 0.52 & 0.55 & 0.54\\
5. & labMT & 99 & 0.54 & 0.54 & 0.54\\
6. & Sent140Lex & 100 & 0.55 & 0.54 & 0.54\\
7. & SentiWordNet & 99 & 0.54 & 0.53 & 0.53\\
8. & WDAL & 99 & 0.53 & 0.53 & 0.52\\
9. & EmoLex & 95 & 0.54 & 0.55 & 0.52\\
10. & MPQA & 93 & 0.54 & 0.55 & 0.52\\
11. & SenticNet & 97 & 0.53 & 0.52 & 0.50\\
12. & SOCAL & 88 & 0.56 & 0.55 & 0.49\\
13. & EmoSenticNet & 98 & 0.52 & 0.46 & 0.45\\
14. & Pattern & 81 & 0.55 & 0.55 & 0.45\\
15. & GI & 80 & 0.55 & 0.55 & 0.44\\
16. & WK & 97 & 0.54 & 0.45 & 0.44\\
17. & OL & 76 & 0.56 & 0.57 & 0.44\\
18. & VADER & 79 & 0.56 & 0.55 & 0.43\\
19. & SentiStrength & 77 & 0.54 & 0.54 & 0.41\\
20. & MaxDiff & 83 & 0.54 & 0.49 & 0.41\\
21. & AFINN & 70 & 0.56 & 0.56 & 0.39\\
22. & ANEW & 63 & 0.52 & 0.48 & 0.30\\
23. & Umigon & 53 & 0.56 & 0.56 & 0.30\\
24. & PANAS-X & 1 & 0.53 & 0.53 & 0.01\\
25. & Emotic

In [31]:
all_results = np.array([float(x[-1]) for x in all_data])

In [32]:
all_results

array([ 0.54080544,  0.30368669,  0.43995263,  0.51582074,  0.54216007,
        0.54294317,  0.54727151,  0.43527349,  0.00764126,  0.44679473,
        0.52982797,  0.38890684,  0.44285335,  0.52487608,  0.53763906,
        0.40947576,  0.55050922,  0.52281758,  0.48905555,  0.50065881,
        0.        ,  0.4137908 ,  0.43325069,  0.29768802,  0.        ,
        0.45247927])

In [33]:
np.mean(all_results)

0.41600687391234187

In [34]:
np.median(all_results)

0.44963699751497788