# Evaluation of Results of "TUD-MMC at MediaEval 2016: Context of Experience task" by Wang & Liem

## Necessary imports

In [1]:
import pandas as pd
import numpy as np
import os.path
import xml.etree.ElementTree as ET

train_path = "res/coe_dataset_icpr/dev_set/"
test_path = "res/coe_dataset_icpr/test_set/"

audio_folder = "audio_descriptors/"
text_folder = "text_descriptors/"
vis_folder = "vis_descriptors/"
metadata_folder = "XML/"

train_entries_path = "res/CoeTraining.csv"

## Datasets

Features are built in the manner described in the paper of Wang & Liem or "Right Inflight? A Dataset for Exploring the Automatic
Prediction of Movies Suitable for a Watching Situation" (https://mmsys2016.itec.aau.at/papers/MMSYS/a45-riegler.pdf), if Wang & Liem do not provide any information.

This leads to following set-up:

Metadata: (language, year published, genre, country, runtime and age rating) - from XML -- 1-Hot Encoding for all categorical values<br>
Text: as is td-idf <br>
Audio: Averaged of all Frames (NaN to 0) - Mel-Frequency Cepstral Coefficients<br>
Visual: as is - Histogram of Oriented Gradients (HOG) gray, Color Moments, local binary patterns (LBP) and Gray Level Run Length Matrix

NOTE: Training data - invalid entry (2_states, also in test set), (Moulin_Rouge!.mp4, should be Moulin_Rouge! --> fixed)

In [2]:
df_base_train = pd.read_csv(train_entries_path)
df_base_train = df_base_train[df_base_train['file_name'] != '2_States'] # remove invalid entry
df_base_train.sort_values(by='file_name', inplace=True)
df_base_train.reset_index(inplace=True, drop=True)
df_targets_train = df_base_train['goodforairplanes'].astype(int)
df_base_train.head(5)

Unnamed: 0,movie_name,file_name,goodforairplanes
0,A Fish Called Wanda,A_Fish_Called_Wanda,1
1,A Goofy Movie,A_Goofy_Movie,0
2,A Million Ways to Die in the West,A_Million_Ways_to_Die_in_the_West,1
3,A Single Man,A_Single_Man,1
4,American Gangster,American_Gangster,1


## Feature extractors
As the dataset was built in a manner that would have been considered dirty already in 2002 a lot of feature extraction is done

In [3]:
def get_audio_features(file_name, use_train=True):
    """
        returns 1x14 dataframe, with averaged Mel-Frequency Cepstral Coefficients + file_name
    """
    base_path = train_path if use_train else test_path
    file_path = os.path.join(base_path, audio_folder, file_name + ".csv")
    if not os.path.isfile(file_path):
        print(file_name, " does not exist!")
        return pd.DataFrame(columns=[str(x) for x in range(13)] + ['file_name'])
    df_audio = pd.read_csv(file_path, header=None).T # transpose (columns are rows)
    df_audio = df_audio.fillna(0) # nan values are treated as 0
    df_audio = pd.DataFrame(df_audio.mean(axis=0)).T # average accross columns
    df_audio['file_name'] = file_name
    return df_audio

def get_all_audio_features(df, use_train=True):
    """
        returns nx14 dataframe, containing audio features for all movies
    """
    dfs = []
    for file_name in df['file_name']:
        dfs.append(get_audio_features(file_name, use_train))
    
    return pd.concat(dfs).reset_index(drop=True)

def get_all_text_features(df, use_train=True):
    """
        returns nx3284 dataframe, containing tf-idf features for all movies
        the dataset creators messed up - contains several terms multiple times
        ordered alphabetically (?) - Live_Nude_Girls and Transformers__Age_of_Extinction where switched (detected perchance)
    """
    base_path = train_path if use_train else test_path
    file_path = os.path.join(base_path, text_folder, "tdf_idf_dev.csv")
    df_txt = pd.read_csv(file_path)
    # the creators of the dataset missed how csv-files work - so we transpose and drop empty rows to get the correct format
    cols = df_txt.columns 
    df_txt = df_txt.T.dropna()
    df_txt.columns = cols
    df_txt.reset_index(inplace=True, drop=True)
    df_txt['file_name'] = sorted(df['file_name']) # we assume the info to be order alphabetically, as we do not have more info
    return df_txt.reset_index(drop=True)

def get_vis_features(file_name, use_train=True):
    """
        returns 1x1653 dataframe, with unspecified visual features + file_name
        we assume that every single value in the csv is one feature
        this may be wrong, as there are two rows and no documentation (again)
    """
    base_path = train_path if use_train else test_path
    file_path = os.path.join(base_path, vis_folder, file_name + ".csv")
    if not os.path.isfile(file_path):
        print(file_name, " does not exist!")
        return pd.DataFrame(columns=[str(x) for x in range(1652)] + ['file_name'])
    df_vis = pd.read_csv(file_path, header=None)
    df_vis = pd.DataFrame(pd.concat([df_vis.loc[0,:], df_vis.loc[1,:]])).reset_index(drop=True).T # treat each value as single feature (-> no aggregation)
    df_vis['file_name'] = file_name
    return df_vis

def get_all_vis_features(df, use_train=True):
    """
        returns nx1653 dataframe, containing visual features for all movies
    """
    dfs = []
    for file_name in df['file_name']:
        dfs.append(get_vis_features(file_name, use_train))
    
    return pd.concat(dfs).reset_index(drop=True)

def get_meta_features(file_name, use_train=True):
    """
        returns 1x7 dataframe, with metadata features + file_name
        One Hot Encoding is not applied here, this should happen later
    """
    base_path = train_path if use_train else test_path
    file_path = os.path.join(base_path, metadata_folder, file_name + ".xml")
    if not os.path.isfile(file_path):
        print(file_name, " does not exist!")
        return pd.DataFrame(columns=['country', 'genre', 'language', 'rated', 'runtime', 'year', 'file_name'])
    etree = ET.parse(file_path)
    movie = etree.getroot().find('movie')
    mv = {}
    mv['language'] = [movie.get('language')]
    mv['year'] = [int(movie.get('year'))]
    mv['genre'] = [movie.get('genre')]
    mv['country'] = [movie.get('country')]
    mv['runtime'] = [int(movie.get('runtime')[:-4])]
    mv['rated'] = [movie.get('rated')]

    df_meta = pd.DataFrame.from_dict(mv)
    df_meta['file_name'] = file_name
    
    return df_meta

def get_all_meta_features(df, use_train=True):
    """
        returns nx7 dataframe, containing metadata features for all movies
    """
    dfs = []
    for file_name in df['file_name']:
        dfs.append(get_meta_features(file_name, use_train))
    
    df_meta = pd.concat(dfs)
    
    df_country = df_meta.country.str.replace(' ','').str.get_dummies(sep=',')
    df_country.columns = ['country_' + x for x in df_country.columns]

    df_genre = df_meta.genre.str.replace(' ','').str.get_dummies(sep=',')
    df_genre.columns = ['genre_' + x for x in df_genre.columns]

    df_language = df_meta.language.str.replace(' ','').str.get_dummies(sep=',')
    df_language.columns = ['language_' + x for x in df_language.columns]

    df_rated = df_meta.rated.str.get_dummies(sep=',')
    df_rated.columns = ['rated_' + x for x in df_rated.columns]
    
    return pd.concat([df_country, df_genre, df_language, df_rated, df_meta[['runtime', 'year', 'file_name']]], axis=1).reset_index(drop=True)

In [4]:
df_audio_train = get_all_audio_features(df_base_train)
df_audio_train.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,file_name
0,66.828814,-5.674521,1.670346,1.143263,-0.635255,1.269376,0.633811,0.012407,0.164257,-1.494813,-0.292583,0.086875,-0.914582,A_Fish_Called_Wanda
1,43.908715,-6.380989,1.36179,-1.483354,-0.67004,-1.626537,0.466197,-1.888169,0.848654,-0.990286,0.673634,-0.972964,-0.141539,A_Goofy_Movie
2,3.390978,-6.725758,0.579762,-0.271885,-0.17564,-0.84569,-0.699064,-0.578434,0.537249,-1.387373,0.747223,-0.88758,-0.205273,A_Million_Ways_to_Die_in_the_West
3,57.743484,-3.722123,2.780418,0.756402,0.043743,-0.960622,-0.435575,-0.176729,1.665236,-2.068548,1.211791,-0.358194,0.738827,A_Single_Man
4,65.354709,-5.609515,-1.303409,-0.831993,-0.518848,-0.019373,-0.500203,-0.897985,0.148561,-0.666728,0.033135,0.383797,-0.209412,American_Gangster


In [5]:
df_txt_train = get_all_text_features(df_base_train)
df_txt_train.head(5)

Unnamed: 0,24000,baby,baseball,big,doc,escort,frozen,heroes,high,huck,...,york,yorks,young,young.1,younger,youngja,zebra,zellweger,zoologists,file_name
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,A_Fish_Called_Wanda
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,A_Goofy_Movie
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,A_Million_Ways_to_Die_in_the_West
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,A_Single_Man
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.051657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,American_Gangster


In [6]:
df_vis_train = get_all_vis_features(df_base_train)
df_vis_train.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1643,1644,1645,1646,1647,1648,1649,1650,1651,file_name
0,0.43031,0.38101,0.34082,0.31642,0.41465,0.38599,0.32938,0.31212,0.35067,0.34246,...,362.83,8.5923,9.1427,8.4101,8.7924,1483.3,417.21,892.59,435.28,A_Fish_Called_Wanda
1,0.002031,0.0,0.0,0.07302,0.027533,0.005346,0.006015,0.11824,0.026991,0.005171,...,20278.0,0.97201,1.3654,1.8032,1.4634,168740.0,20896.0,34434.0,20967.0,A_Goofy_Movie
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,119950.0,1e-06,0.002466,4e-06,0.002466,729320.0,119950.0,230400.0,119950.0,A_Million_Ways_to_Die_in_the_West
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9550.7,5.9433,6.0215,4.3839,5.4288,32242.0,10313.0,22308.0,9850.0,A_Single_Man
4,0.23164,0.28629,0.30068,0.28118,0.24489,0.27849,0.29076,0.2938,0.16104,0.16685,...,109.62,17.237,15.543,13.671,15.231,53559.0,8637.4,18597.0,8679.0,American_Gangster


In [7]:
df_meta_train = get_all_meta_features(df_base_train)
df_meta_train.head(5)

Unnamed: 0,country_Argentina,country_Australia,country_Bahamas,country_Canada,country_China,country_CzechRepublic,country_Egypt,country_France,country_Germany,country_India,...,rated_G,rated_N/A,rated_NOT RATED,rated_PG,rated_PG-13,rated_R,rated_TV-MA,runtime,year,file_name
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,108,1988,A_Fish_Called_Wanda
1,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,78,1995,A_Goofy_Movie
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,116,2014,A_Million_Ways_to_Die_in_the_West
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,99,2009,A_Single_Man
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,157,2007,American_Gangster


## Implementation fun

In [101]:
#warnings.filterwarnings(action='ignore')

#dw = DataWrapper(df_audio_train, df_vis_train, df_txt_train, df_meta_train, df_targets_train)
#dw.generate_subspace()

0.5974691974691975
best features:  [4, 6, 7, 8, 10, 12]
0.6913208360267185
best features:  [1, 6]
0.6983193277310924
best features:  [11]
0.7078431372549019
best features:  [1]
0.5489482739482738
best features:  [4, 10]
0.5629292929292931
best features:  [1, 4, 5, 8, 9, 11, 12]
0.6536507936507935
best features:  [1]
0.6644949494949495
best features:  [10, 11]
0.6661643911643911
best features:  [1, 2, 9, 10]
0.6986002886002887
best features:  [1, 4, 5]
0.5888111888111889
best features:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 1

0.654109224109224
best features:  [6, 7, 11, 13, 17, 20, 22, 27, 32, 34, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 54, 56, 58, 73, 77, 92, 97, 106, 113, 121, 122, 127, 133, 134, 140, 143, 146, 148, 150, 154, 161, 163, 166, 168, 174, 177, 184, 186, 188, 190, 192, 196, 201, 204, 205, 206, 209, 212, 213, 216, 220, 225, 227, 232, 234, 238, 239, 240, 245, 248, 250, 253, 261, 262, 266, 268, 270, 272, 276, 278, 289, 293, 297, 300, 301, 302, 304, 306, 312, 313, 315, 320, 321, 324, 326, 328, 329, 330, 331, 332, 336, 338, 339, 343, 344, 345, 350, 355, 356, 363, 366, 369, 373, 378, 387, 391, 395, 399, 403, 407, 410, 411, 413, 416, 417, 418, 420, 421, 422, 424, 426, 434, 435, 439, 440, 449, 451, 454, 455, 457, 470, 476, 477, 483, 492, 494, 495, 497, 499, 506, 510, 513, 518, 520, 522, 525, 526, 530, 533, 538, 539, 540, 545, 555, 558, 559, 561, 562, 563, 567, 569, 570, 573, 576, 577, 579, 583, 584, 585, 587, 595, 596, 597, 601, 602, 605, 608, 611, 615, 617, 618, 620, 623, 624, 625, 628, 632, 636, 637,

0.7019813519813519
best features:  [12, 39, 43, 49, 50, 63, 64, 72, 75, 78, 86, 87, 88, 95, 112, 134, 135, 140, 155, 175, 184, 189, 190, 205, 216, 220, 224, 233, 235, 245, 256, 258, 280, 291, 293, 306, 323, 338, 339, 343, 344, 360, 362, 367, 375, 378, 382, 384, 401, 406, 409, 413, 425, 448, 476, 482, 490, 492, 493, 497, 505, 512, 521, 534, 544, 552, 555, 557, 560, 563, 572, 582, 587, 593, 626, 634, 641, 644, 645, 653, 666, 685, 689, 691, 696, 697, 699, 701, 720, 722, 725, 739, 740, 743, 755, 758, 765, 775, 785, 788, 796, 826, 837, 847, 849, 854, 855, 863, 864, 876, 883, 894, 900, 904, 908, 910, 915, 925, 940, 945, 946, 950, 952, 956, 962, 966, 967, 972, 983, 1005, 1010, 1022, 1028, 1037, 1038, 1050, 1054, 1073, 1083, 1103, 1111, 1113, 1121, 1132, 1133, 1144, 1148, 1175, 1191, 1194, 1195, 1199, 1207, 1208, 1238, 1239, 1245, 1246, 1256, 1263, 1268, 1273, 1280, 1281, 1301, 1302, 1305, 1313, 1324, 1342, 1346, 1357, 1361, 1365, 1366, 1375, 1409, 1412, 1414, 1429, 1442, 1447, 1456, 1459, 147

0.7751398601398601
best features:  [0, 2, 4, 6, 11, 14, 16, 19, 25, 26, 32, 33, 36, 40, 42, 43, 46, 48, 50, 56, 57, 59, 65, 66, 67, 68, 69, 77, 79, 86, 91, 93, 94, 95, 96, 99, 101, 102, 103, 104, 105, 106, 108, 109, 110, 112, 120, 122, 125, 126, 127, 132, 137, 140, 144, 148, 150, 151, 153, 154, 155, 156, 158, 159, 160, 162, 174, 175, 176, 178, 179, 185, 186, 191, 196, 202, 208, 212, 215, 221, 227, 228, 229, 237, 239, 241, 244, 248, 251, 253, 254, 257, 262, 265, 281, 285, 289, 291, 293, 295, 297, 298, 301, 310, 312, 316, 317, 318, 319, 322, 324, 326, 331, 334, 335, 336, 338, 343, 344, 346, 348, 350, 355, 365, 367, 372, 374, 375, 381, 385, 386, 395, 397, 398, 399, 401, 404, 408, 409, 412, 414, 415, 416, 420, 421, 422, 424, 429, 432, 438, 440, 442, 443, 445, 447, 454, 455, 457, 462, 463, 465, 467, 468, 469, 477, 479, 480, 487, 491, 492, 493, 494, 496, 497, 498, 499, 501, 503, 507, 509, 515, 519, 527, 528, 529, 530, 533, 540, 551, 553, 554, 555, 556, 559, 562, 563, 567, 579, 580, 581, 585,

0.6704584304584303
best features:  [4, 5, 8, 9, 12, 13, 16, 19, 20, 23, 24, 27, 28, 29, 30, 34, 36, 38, 39, 42, 49, 51, 54, 55, 57, 59, 63, 64, 65, 69, 73, 75, 77, 82, 85, 87, 88, 101, 102, 106, 112, 116, 119, 125, 130, 131, 137, 142, 144, 145, 148, 150, 151, 155, 156, 157, 159, 161, 163, 168, 177, 183, 184, 187, 195, 197, 209, 213, 221, 222, 227, 229, 237, 238, 241, 248, 249, 251, 265, 266, 267, 270, 271, 273, 276, 277, 278, 279, 283, 284, 287, 289, 291, 293, 295, 300, 302, 304, 307, 311, 318, 322, 324, 329, 331, 334, 335, 337, 340, 344, 345, 346, 349, 351, 352, 353, 354, 360, 365, 368, 375, 379, 383, 392, 401, 404, 410, 413, 419, 429, 431, 432, 435, 436, 437, 442, 443, 444, 457, 458, 463, 471, 472, 477, 479, 491, 497, 499, 501, 503, 508, 513, 516, 518, 527, 530, 531, 534, 536, 539, 541, 543, 546, 551, 552, 558, 560, 568, 572, 583, 585, 586, 587, 588, 590, 592, 596, 600, 604, 606, 607, 608, 611, 613, 616, 617, 618, 619, 624, 625, 633, 635, 640, 646, 651, 652, 653, 658, 663, 665, 668, 

0.6103807303807305
best features:  [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 157, 158, 159, 160, 161, 162, 163, 165, 166, 168, 169, 170, 172, 173, 174, 175, 176, 177, 178, 179, 180, 182, 183, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 

0.627016317016317
best features:  [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 49, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 82, 84, 86, 87, 88, 90, 92, 93, 94, 95, 98, 99, 100, 102, 103, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 139, 140, 141, 142, 143, 144, 145, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 163, 168, 169, 171, 172, 173, 175, 176, 177, 179, 180, 181, 183, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 206, 207, 209, 210, 211, 212, 216, 217, 218, 219, 220, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 251, 252, 253,

0.7221212121212123
best features:  [2, 3, 7, 8, 9, 12, 13, 17, 20, 26, 27, 28, 29, 31, 38, 41, 42, 43, 46, 47, 49, 55, 59, 67, 68, 71, 75, 79, 89, 90, 92, 97, 98, 99, 104, 105, 112, 113, 116, 117, 118, 121, 122, 123, 124, 126, 127, 130, 131, 133, 135, 141, 142, 153, 156, 157, 159, 162, 163, 164, 169, 170, 173, 174, 176, 180, 181, 183, 184, 185, 187, 189, 192, 194, 195, 197, 198, 207, 209, 210, 219, 221, 223, 231, 243, 248, 250, 251, 260, 263, 265, 266, 267, 270, 271, 273, 274, 277, 282, 283, 295, 297, 302, 305, 309, 314, 322, 323, 324, 325, 327, 328, 331, 335, 346, 351, 356, 362, 363, 364, 369, 373, 375, 382, 384, 386, 387, 389, 390, 394, 397, 399, 401, 403, 404, 405, 406, 408, 409, 410, 412, 417, 423, 425, 427, 428, 429, 431, 435, 438, 439, 441, 444, 447, 450, 451, 458, 463, 466, 476, 477, 480, 483, 484, 486, 492, 499, 501, 502, 503, 508, 509, 510, 511, 514, 520, 522, 523, 524, 525, 529, 531, 534, 536, 537, 538, 542, 544, 545, 546, 547, 548, 549, 550, 553, 555, 556, 558, 560, 570, 574

0.743993783993784
best features:  [2, 3, 4, 5, 7, 10, 12, 16, 18, 20, 23, 30, 31, 34, 37, 38, 39, 45, 52, 54, 58, 59, 62, 66, 68, 73, 75, 76, 82, 85, 86, 88, 92, 93, 95, 99, 102, 105, 107, 108, 109, 110, 111, 114, 121, 125, 128, 129, 135, 136, 137, 139, 142, 148, 150, 153, 156, 158, 159, 160, 162, 163, 165, 166, 169, 170, 174, 177, 178, 181, 186, 189, 191, 195, 196, 198, 201, 203, 204, 205, 206, 207, 208, 209, 210, 213, 214, 219, 223, 234, 237, 238, 242, 245, 248, 260, 266, 268, 273, 274, 279, 282, 287, 293, 294, 297, 304, 305, 306, 313, 315, 316, 322, 324, 330, 335, 336, 337, 342, 344, 345, 352, 360, 369, 376, 377, 380, 384, 386, 393, 396, 397, 399, 400, 401, 416, 422, 423, 425, 426, 429, 430, 431, 434, 440, 444, 445, 450, 452, 454, 455, 458, 461, 466, 471, 478, 482, 483, 484, 485, 491, 493, 497, 499, 502, 504, 506, 507, 512, 518, 519, 523, 531, 537, 540, 542, 543, 552, 553, 554, 561, 567, 569, 590, 592, 602, 603, 604, 605, 609, 611, 612, 615, 616, 617, 618, 620, 623, 626, 630, 632, 6

better features:  [0, 9, 13, 19, 23, 24, 28, 31, 32, 33, 35, 36, 38, 40, 41, 46, 49, 51, 53, 59, 61, 68, 70, 72, 76, 78, 79, 81, 83, 88, 90, 91, 94, 96, 100, 101, 105, 107, 109, 111, 113, 116, 124, 125, 126, 127, 128, 131, 132, 133, 137, 140, 141, 143, 144, 147, 149, 151, 152, 154, 158, 159, 160, 164, 167, 171, 174, 175, 177, 178, 181, 183, 185, 186, 189, 193, 194, 196, 198, 199, 214, 215, 219, 222, 223, 224, 227, 229, 235, 240, 243, 244, 251, 254, 257, 260, 261, 268, 270, 275, 277, 279, 285, 287, 291, 295, 296, 300, 301, 307, 308, 309, 311, 314, 320, 321, 322, 324, 325, 329, 330, 332, 335, 339, 345, 346, 347, 348, 351, 352, 353, 357, 359, 360, 362, 364, 368, 369, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 382, 386, 389, 391, 392, 395, 399, 400, 401, 409, 412, 416, 418, 419, 424, 426, 431, 432, 434, 436, 438, 439, 440, 441, 442, 449, 450, 452, 455, 463, 467, 469, 470, 471, 472, 473, 475, 476, 477, 482, 485, 488, 490, 491, 492, 497, 504, 507, 508, 510, 514, 516, 517, 521, 522, 52

better features:  [2, 7, 34, 38, 44, 52, 56, 61, 65, 70, 86, 100, 115, 125, 142, 143, 146, 170, 180, 183, 190, 205, 212, 221, 228, 233, 257, 272, 285, 303, 307, 321, 326, 340, 341, 347, 374, 392, 402, 405, 416, 428, 434, 438, 451, 453, 458, 459, 462, 470, 473, 480, 483, 519, 521, 540, 541, 556, 572, 575, 588, 608, 611, 623, 631, 643, 661, 663, 679, 703, 710, 719, 729, 735, 768, 824, 825, 826, 832, 835, 846, 847, 849, 858, 876, 880, 900, 907, 914, 931, 950, 964, 969, 982, 993, 1031, 1045, 1048, 1049, 1071, 1081, 1086, 1116, 1136, 1142, 1148, 1157, 1168, 1175, 1179, 1180, 1190, 1210, 1218, 1229, 1234, 1245, 1249, 1251, 1253, 1254, 1262, 1280, 1290, 1294, 1330, 1335, 1349, 1364, 1381, 1390, 1409, 1416, 1426, 1429, 1441, 1455, 1463, 1474, 1504, 1512, 1519, 1536, 1537, 1539, 1547, 1563, 1566, 1576, 1577, 1595, 1608, 1611, 1616, 1630, 1637, 1641]
0.7096611721611723
best features:  [4, 21, 113, 118, 122, 186, 211, 228, 240, 286, 330, 346, 348, 357, 364, 422, 475, 484, 500, 509, 619, 631, 635,

0.5987878787878789
best features:  [6, 7, 9, 11, 15, 18, 21, 24, 25, 29, 30, 32, 34, 36, 37, 38, 39, 40, 41, 43, 45, 47, 48, 51, 52, 56, 57, 58, 61, 63, 64, 65, 66, 67, 68, 70, 71, 73, 77, 79, 81, 83, 84, 86, 87, 89, 90, 91, 92, 93, 95, 97, 99, 104, 106, 107, 110, 112, 114, 115, 116, 117, 119, 120, 121, 122, 124, 126, 127, 129, 130, 131, 132, 134, 135, 137, 138, 140, 142, 144, 145, 147, 149, 152, 153, 154, 155, 156, 157, 159, 163, 165, 167, 169, 171, 172, 174, 180, 181, 182, 183, 185, 186, 187, 188, 189, 191, 192, 194, 195, 197, 198, 204, 206, 208, 209, 210, 211, 213, 214, 215, 216, 217, 218, 221, 226, 227, 228, 230, 234, 235, 236, 237, 239, 240, 241, 243, 245, 246, 247, 251, 252, 253, 254, 255, 256, 258, 259, 260, 262, 263, 264, 265, 272, 276, 278, 286, 287, 293, 295, 296, 297, 299, 300, 304, 307, 308, 310, 312, 313, 314, 316, 317, 319, 320, 323, 324, 325, 326, 329, 330, 331, 336, 341, 343, 344, 346, 348, 351, 352, 353, 354, 356, 358, 360, 364, 366, 367, 368, 371, 372, 374, 375, 376, 

0.6235286935286936
best features:  [1, 3, 4, 6, 7, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 26, 28, 30, 32, 33, 37, 38, 41, 43, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 59, 61, 62, 64, 67, 71, 74, 75, 77, 83, 84, 88, 89, 92, 93, 96, 99, 101, 102, 104, 108, 109, 111, 115, 121, 123, 125, 127, 128, 129, 136, 137, 142, 145, 150, 155, 158, 160, 161, 162, 164, 167, 169, 170, 171, 174, 179, 180, 184, 185, 186, 188, 190, 194, 195, 198, 199, 200, 203, 204, 205, 206, 207, 208, 213, 215, 216, 217, 218, 222, 224, 226, 228, 229, 235, 236, 238, 240, 241, 242, 243, 250, 253, 255, 256, 259, 260, 261, 263, 264, 265, 266, 269, 270, 274, 275, 276, 278, 279, 283, 284, 286, 288, 289, 291, 293, 294, 295, 300, 302, 303, 304, 307, 310, 314, 320, 321, 323, 328, 333, 335, 341, 342, 344, 345, 347, 351, 353, 355, 358, 361, 365, 366, 367, 368, 369, 370, 378, 380, 381, 383, 384, 385, 386, 387, 389, 393, 397, 399, 402, 404, 406, 407, 410, 411, 413, 417, 418, 421, 422, 424, 425, 430, 431, 433, 434, 436, 437

0.6667443667443667
best features:  [0, 4, 10, 11, 13, 14, 20, 26, 27, 29, 30, 31, 34, 36, 37, 39, 41, 42, 43, 44, 45, 49, 50, 53, 54, 55, 57, 60, 63, 64, 68, 69, 73, 75, 76, 79, 80, 82, 83, 84, 85, 86, 88, 89, 90, 91, 92, 93, 95, 99, 101, 104, 106, 110, 112, 113, 118, 119, 120, 122, 129, 132, 135, 136, 139, 141, 142, 143, 146, 150, 151, 152, 154, 161, 162, 163, 166, 167, 183, 185, 188, 192, 197, 198, 200, 203, 204, 206, 210, 216, 219, 220, 222, 225, 227, 232, 233, 234, 235, 237, 238, 239, 240, 241, 242, 243, 245, 247, 250, 251, 252, 253, 254, 256, 257, 258, 259, 262, 263, 264, 266, 267, 268, 271, 274, 275, 276, 282, 284, 289, 294, 299, 302, 304, 305, 308, 309, 311, 312, 315, 317, 319, 320, 322, 327, 329, 330, 332, 335, 338, 340, 341, 342, 343, 347, 349, 350, 351, 352, 354, 357, 358, 360, 361, 364, 365, 366, 367, 372, 373, 379, 381, 383, 387, 393, 395, 398, 399, 402, 406, 407, 408, 409, 410, 412, 413, 414, 415, 416, 422, 424, 425, 427, 432, 434, 440, 441, 444, 445, 446, 447, 448, 449, 4

0.7078431372549019
best features:  [213, 637]
0.7101087801087801
best features:  [14, 28, 36, 37, 42, 44, 50, 53, 55, 56, 60, 65, 67, 69, 82, 83, 84, 89, 95, 109, 112, 113, 119, 123, 126, 127, 134, 136, 137, 141, 142, 146, 150, 155, 163, 183, 184, 187, 188, 189, 190, 193, 194, 199, 202, 209, 213, 215, 226, 231, 233, 248, 256, 263, 268, 269, 275, 282, 283, 300, 301, 310, 313, 316, 329, 330, 334, 337, 343, 345, 346, 350, 370, 374, 375, 380, 388, 391, 392, 393, 404, 405, 425, 435, 438, 456, 475, 476, 485, 492, 495, 511, 517, 529, 533, 536, 544, 552, 562, 564, 567, 568, 575, 576, 577, 587, 589, 592, 596, 599, 603, 613, 620, 631, 640, 641, 645, 646, 650, 651, 652, 653, 659, 661, 663, 669, 672, 676, 677, 679, 689, 697, 700, 705, 708, 720, 721, 724, 726, 729, 741, 742, 746, 756, 758, 760, 772, 781, 786, 795, 798, 804, 811, 821, 825, 829, 831, 832, 833, 836, 856, 862, 864, 882, 884, 891, 894, 903, 904, 905, 906, 910, 933, 940, 941, 944, 947, 948, 961, 976, 978, 982, 984, 996, 1002, 1008, 1015,

0.5550777000777001
best features:  [1, 10, 13, 19, 29, 30, 40, 52, 58, 63, 79, 80, 85, 87, 93, 94, 100, 102, 114, 115, 119, 123, 132, 141, 151, 166, 188, 189, 193, 198, 200, 207, 220, 236, 258, 278, 295, 296, 301, 305, 307, 319, 322, 329, 333, 339, 352, 365, 367, 393, 400, 403, 413, 414, 434, 440, 455, 456, 459, 462, 466, 475, 487, 493, 506, 512, 541, 554, 560, 564, 568, 570, 586, 594, 599, 624, 628, 629, 635, 639, 640, 645, 657, 658, 659, 671, 674, 676, 692, 694, 695, 698, 719, 721, 741, 752, 760, 761, 763, 765, 767, 775, 790, 801, 813, 821, 824, 829, 832, 833, 845, 857, 860, 880, 884, 887, 898, 916, 927, 928, 930, 932, 934, 936, 938, 947, 958, 970, 971, 973, 985, 987, 1003, 1021, 1030, 1034, 1035, 1045, 1069, 1071, 1080, 1107, 1110, 1113, 1117, 1125, 1130, 1133, 1142, 1154, 1168, 1173, 1187, 1193, 1207, 1214, 1215, 1216, 1224, 1226, 1239, 1249, 1252, 1263, 1265, 1269, 1270, 1276, 1277, 1280, 1290, 1302, 1309, 1319, 1328, 1330, 1336, 1346, 1353, 1366, 1371, 1372, 1399, 1424, 1429, 143

0.5958080808080808
best features:  [8, 9, 11, 16, 17, 18, 21, 27, 29, 30, 31, 33, 37, 38, 40, 41, 42, 53, 60, 66, 67, 68, 69, 71, 72, 73, 77, 79, 80, 82, 85, 91, 94, 96, 98, 100, 104, 106, 107, 108, 109, 112, 114, 117, 121, 124, 126, 129, 133, 134, 135, 137, 141, 143, 144, 145, 146, 147, 152, 157, 165, 166, 167, 169, 171, 174, 177, 178, 182, 183, 185, 188, 192, 193, 196, 202, 206, 207, 210, 211, 214, 219, 220, 221, 223, 227, 241, 248, 251, 252, 254, 256, 263, 265, 268, 272, 274, 276, 277, 278, 284, 285, 286, 290, 293, 296, 300, 303, 304, 307, 308, 309, 313, 317, 318, 320, 325, 326, 327, 328, 333, 334, 337, 344, 352, 360, 362, 365, 366, 368, 371, 374, 375, 376, 377, 380, 382, 383, 389, 390, 391, 394, 402, 403, 407, 410, 412, 414, 416, 421, 422, 424, 426, 428, 431, 432, 433, 435, 436, 437, 439, 441, 443, 446, 450, 451, 455, 458, 461, 465, 466, 467, 470, 471, 474, 480, 482, 483, 486, 488, 490, 491, 495, 498, 500, 502, 508, 509, 511, 516, 519, 521, 522, 525, 526, 528, 536, 538, 540, 543, 5

0.6538111888111887
best features:  [4, 5, 6, 9, 10, 16, 19, 20, 22, 24, 26, 30, 31, 36, 43, 44, 47, 48, 51, 60, 62, 63, 72, 86, 87, 88, 90, 94, 95, 97, 105, 108, 112, 113, 115, 116, 118, 119, 133, 136, 140, 142, 147, 150, 151, 154, 158, 166, 168, 169, 176, 180, 186, 189, 192, 196, 203, 204, 206, 207, 210, 213, 216, 226, 229, 230, 231, 234, 238, 241, 242, 244, 247, 255, 257, 258, 259, 265, 266, 276, 278, 279, 283, 286, 290, 292, 295, 298, 309, 310, 311, 318, 327, 330, 331, 344, 349, 350, 354, 357, 358, 359, 361, 362, 365, 368, 370, 373, 374, 375, 376, 378, 388, 389, 392, 394, 398, 404, 407, 410, 418, 419, 423, 424, 425, 427, 432, 435, 440, 445, 447, 448, 451, 452, 454, 457, 460, 466, 469, 475, 484, 489, 492, 493, 494, 503, 505, 506, 508, 509, 510, 513, 516, 518, 519, 520, 523, 527, 530, 532, 533, 538, 541, 544, 545, 546, 549, 550, 552, 554, 567, 569, 574, 575, 577, 578, 579, 580, 581, 582, 584, 586, 589, 598, 600, 605, 607, 610, 612, 613, 615, 617, 621, 625, 630, 632, 634, 637, 641, 642

0.7279020979020979
best features:  [14, 15, 16, 25, 30, 34, 66, 79, 81, 84, 85, 90, 125, 137, 142, 149, 171, 181, 197, 205, 213, 214, 227, 232, 247, 259, 261, 273, 276, 284, 292, 312, 324, 326, 341, 343, 346, 347, 391, 393, 398, 412, 425, 446, 447, 452, 453, 456, 460, 466, 492, 500, 515, 517, 554, 556, 559, 616, 619, 628, 631, 656, 666, 683, 705, 716, 718, 722, 734, 741, 747, 749, 757, 762, 763, 767, 770, 774, 782, 790, 802, 823, 850, 858, 865, 867, 880, 882, 884, 888, 892, 918, 922, 937, 939, 956, 957, 966, 977, 980, 985, 988, 1001, 1003, 1006, 1008, 1012, 1017, 1026, 1034, 1044, 1045, 1046, 1052, 1054, 1065, 1093, 1101, 1119, 1139, 1140, 1142, 1144, 1147, 1156, 1160, 1163, 1177, 1179, 1181, 1201, 1215, 1224, 1228, 1245, 1248, 1250, 1261, 1279, 1296, 1304, 1313, 1331, 1340, 1356, 1364, 1379, 1407, 1411, 1422, 1428, 1433, 1436, 1449, 1459, 1464, 1472, 1486, 1489, 1497, 1499, 1520, 1524, 1533, 1535, 1551, 1556, 1557, 1563, 1573, 1579, 1615, 1617, 1642]
0.7318326118326117
best features: 

0.7084382284382285
best features:  [183, 188, 202, 241, 306, 339, 362, 376, 434, 578, 736, 757, 765, 790, 847, 908, 966, 1098, 1106, 1140, 1202, 1225, 1283, 1299, 1476, 1485, 1521, 1554, 1633]
0.7855594405594405
best features:  [72, 772, 816, 1259, 1377, 1499, 1520, 1524, 1538, 1639]
0.6166666666666666
best features:  ['12s', '16yearold', '19', '1974', '2007', '21yearold', '24', '6.1', '600000', '60s', '70s', '74th', 'abandon', 'academic', 'access', 'acclaimed', 'accumulate', 'accustomed', 'achieve', 'acrimony', 'activists', 'adaptation', 'adrenaline', 'adult', 'affair', 'affect', 'affections', 'african', 'agency', 'agent', 'aging', 'agriculture', 'ahkmenrah.1', 'aided', 'ailment', 'akhrot', 'alberts', 'alexei', 'alike', 'alliance', 'allow', 'alter', 'amar', 'amid', 'animals', 'apartment', 'apologize', 'approve', 'archer', 'areas', 'aristocratic', 'armands', 'arranges', 'artistic', 'ask', 'assembly', 'attempted', 'attention', 'attitudes', 'autobots', 'awakens', 'babadook', 'bad', 'ball

0.6382617382617383
best features:  ['00', '12s', '15year', '16yearold', '1874', '1882', '1899', '19', '1960s', '1974', '1980s', '22', '24', '24000', '30', '30.1', '34', '3d', '4th2014', '7', '70s', '74th', 'aaron', 'abandon', 'abandoned', 'abortion', 'abortion.1', 'abroad', 'absolute', 'accused', 'acme', 'acrimony', 'act', 'acting', 'actress.1', 'actually.1', 'adams', 'adapt', 'admirer', 'adolescent', 'adrenaline', 'adventure.1', 'advice', 'affair', 'affair.2', 'affect', 'affecting', 'affections', 'afraid', 'african', 'age', 'age.1', 'agent.1', 'agent.2', 'agents', 'ago', 'ago.1', 'agree', 'agriculture', 'ahead', 'ahead.1', 'ailment', 'alan', 'alanadale', 'alaska', 'alberts', 'alibi', 'alice', 'alien', 'allgirl', 'alliance', 'ally', 'alongside', 'alter', 'ambushed', 'america', 'amid', 'amidst', 'anderson', 'angeles', 'angeles.1', 'animation', 'annie', 'antics', 'anymore', 'apart', 'appearing', 'apprehended', 'approval', 'approve', 'archer', 'area', 'areas', 'arends', 'armands', 'armed'

0.6664269064269064
best features:  ['118th', '12', '16yearold', '1874', '1895', '19', '1950', '1960s', '1962', '2007', '21', '21yearold', '22', '24', '24000', '30', '30.1', '34', '5', '6', '6.1', '600000', '7', '74th', 'Unnamed: 43', 'aaron', 'abandon', 'abandoned', 'abilities', 'able', 'absolute', 'academic', 'accept', 'accident', 'acclaimed', 'account', 'accumulate', 'accused', 'accuseds', 'accustomed', 'acerbic', 'achieve', 'acme', 'acting', 'action', 'activists', 'actor', 'actorgigolo', 'actors', 'actually', 'adapt', 'admirer', 'adrenaline', 'adult', 'affair', 'affair.2', 'afraid', 'africa', 'african', 'agency', 'agent', 'agents', 'ago', 'agrees', 'agrees.1', 'agriculture', 'ahead', 'ahead.1', 'ahkmenrah', 'ahkmenrah.1', 'aid', 'aided', 'ailment', 'akhrot', 'alan', 'alice', 'alien', 'alike', 'alive', 'alive.1', 'allgirl', 'alliance', 'allows', 'ally.1', 'alter', 'ambushed', 'amelia', 'amelia.1', 'american', 'amid', 'amidst', 'animated', 'animation', 'anna', 'annie', 'announce', 'an

0.7078431372549019
best features:  ['dangerous', 'furnish', 'marian', 'optimus', 'reconciliation', 'volatile', 'werewolves']
0.7153535353535354
best features:  ['1882', '1895', '19', '24000', '30', '30yearold', '6.1', '600000', '74th', 'abilities', 'able', 'account', 'actually.1', 'administrator', 'adventure.1', 'affair', 'affecting', 'affections', 'afraid', 'africa', 'age.1', 'agency', 'agent.2', 'ahead', 'ahead.1', 'aided', 'alaska', 'alberts', 'alexei', 'alice', 'alien', 'allows', 'altered', 'alzheimers', 'amelia.1', 'amid', 'amidst', 'angeles.1', 'animals', 'anna', 'announce', 'anymore', 'apprentice', 'archer', 'archie', 'area', 'arrest', 'asia', 'ask', 'asks', 'assigning', 'attitudes', 'attorney', 'available', 'awakens', 'aware', 'away', 'awesome', 'babadook.1', 'badges', 'bajaj', 'ball.1', 'band', 'bartender', 'batman', 'battle', 'beach.1', 'beautiful', 'begin', 'big', 'bigfoot', 'biggest', 'billionaire', 'birth', 'blair', 'blamed', 'blinded', 'blue', 'blurs', 'body.1', 'boggarts

better features:  ['118th', '12', '12s', '15', '1874', '1882', '1895', '1950', '1962', '1963', '1974', '2007', '21', '21yearold', '22', '24', '24000', '34', '6.1', '60', '600000', '60s', '7', '74th', 'Unnamed: 43', 'aaron', 'abandon', 'abortion', 'abroad', 'absolute', 'accept', 'access', 'accidentally', 'acclaimed', 'accustomed', 'achieve', 'acme', 'acquisitions', 'acting', 'action', 'actor', 'actorgigolo', 'actually', 'adapt', 'adolescent', 'adrenaline', 'advancement', 'adventure.1', 'affair', 'affairs', 'affect', 'afraid', 'african', 'age', 'age.1', 'aging', 'ago', 'agriculture', 'ahead', 'ahead.1', 'aid', 'ailment', 'alanadale', 'albert.1', 'alberts', 'alexei', 'alien', 'alike', 'alive', 'alliance', 'allies', 'alongside', 'altered', 'amar', 'amelia', 'amelia.1', 'american', 'amid', 'angeles', 'angeles.1', 'angeles.2', 'animals', 'animated', 'animation', 'anna', 'antics', 'anymore', 'apart', 'apologize', 'appearing', 'apprentice', 'approve', 'archer', 'archie', 'armands', 'army', 'ar

better features:  ['118th', '12', '15', '1882', '1976', '2007', '21yearold', '24', '24000', '30', '30.1', '34', '6', '600000', '60s', '70s', '74th', 'Unnamed: 43', 'abandon', 'abortion', 'access', 'accident', 'accident.1', 'account', 'accumulate', 'accused', 'accustomed', 'acrimony', 'acting', 'action', 'actionpacked', 'actor', 'actors', 'actress.1', 'actually', 'actually.1', 'adams', 'adapt', 'administrator', 'admirer', 'adult', 'advancement', 'adventure.1', 'affect', 'afraid', 'africa', 'african', 'agency', 'ago', 'agree', 'agrees.1', 'ahead.1', 'aid', 'ailment', 'alaska', 'albert', 'albert.1', 'alibi', 'alike', 'allgirl', 'allies', 'altered', 'alzheimers', 'amar', 'american', 'announce', 'apart', 'apologize', 'appearing', 'apprehended', 'archer', 'areas', 'arends', 'argentinean', 'armed', 'arrest', 'art', 'artistic', 'asap', 'asks', 'assume', 'attempted', 'attempts', 'attend', 'attention', 'augustus', 'autobots', 'away', 'away.1', 'awful', 'babadook.1', 'background', 'bad', 'badges'

0.7133376427494074
best features:  ['118th', '12', '12s', '15', '1874', '1882', '1960s', '1962', '1963', '1974', '1976', '2007', '21', '21yearold', '22', '24', '24000', '30', '30yearold', '4th2014', '5', '6.1', '60', '600000', '60s', '7', '70s', '74th', 'Unnamed: 43', 'aaron', 'abandon', 'abandoned', 'abilities', 'able', 'abortion.1', 'absolute', 'accept', 'access', 'accidentally', 'account', 'accumulate', 'accused', 'accustomed', 'acerbic', 'acme', 'acquisitions', 'acrimony', 'act', 'acting', 'action', 'activists', 'actor', 'actorgigolo', 'actors', 'actress', 'actress.1', 'actually', 'adams', 'adapt', 'administrator', 'adolescent', 'adult', 'advancement', 'advice', 'affair', 'affair.1', 'affair.2', 'affairs', 'affecting', 'affections', 'africa', 'african', 'age', 'age.1', 'agent', 'agent.1', 'agent.2', 'agents', 'aging', 'agree', 'agrees', 'agriculture', 'ahead', 'ahkmenrah.1', 'aid', 'aided', 'ailment', 'akhrot', 'alan', 'alanadale', 'alaska', 'albert', 'albert.1', 'alberts', 'alexei

0.7218272740331564
best features:  ['00', '12s', '15', '15year', '1882', '1895', '1899', '19', '1960s', '1974', '1976', '21', '24', '24000', '30.1', '30yearold', '34', '3d', '6.1', '60s', '7', '70s', 'abortion', 'absolute', 'accept', 'accidentally', 'acclaimed', 'account', 'accumulate', 'accused', 'accuseds', 'accustomed', 'achieve', 'acrimony', 'acting', 'action', 'activists', 'actorgigolo', 'actress', 'actually', 'actually.1', 'adams', 'administrator', 'admirer', 'adolescent', 'adult', 'advancement', 'adventure', 'adventure.1', 'affair', 'affair.1', 'affect', 'affecting', 'affections', 'africa', 'age', 'age.1', 'agent.1', 'agent.2', 'ago', 'agree', 'agrees.1', 'agriculture', 'ahead', 'ahkmenrah', 'alanadale', 'alaska', 'alexei', 'alice', 'alien', 'alike', 'alliance', 'ally', 'ally.1', 'alongside', 'altered', 'amelia', 'amelia.1', 'american', 'amid', 'amidst', 'angeles', 'angeles.1', 'animated', 'anna', 'annie', 'announce', 'antics', 'anymore', 'apart', 'apartment', 'appearing', 'appe

0.7252424046541692
best features:  ['16yearold', '1874', '1882', '19', '1950', '1963', '1974', '1976', '1980s', '21', '24', '30', '3d', '5', '60', '60s', '70s', 'abandon', 'abilities', 'able', 'abortion', 'accept', 'acclaimed', 'accumulate', 'acerbic', 'achieve', 'acme', 'acquisitions', 'act', 'actionpacked', 'actor', 'actorgigolo', 'actress.1', 'actually.1', 'adapt', 'administrator', 'admirer', 'adult', 'advancement', 'adventure', 'adventure.1', 'affair', 'affair.2', 'affect', 'affecting', 'affections', 'africa', 'african', 'age.1', 'agency', 'agent', 'aging', 'agrees', 'ahead.1', 'ahkmenrah.1', 'aided', 'ailment', 'akhrot', 'alaska', 'albert.1', 'alibi', 'alice', 'alive.1', 'allies', 'allow', 'ally', 'ally.1', 'altered', 'ambushed', 'amelia', 'amelia.1', 'american', 'ancestors', 'angeles', 'angeles.1', 'animals', 'animated', 'animation', 'anna', 'apologize', 'approval', 'arends', 'army', 'arranges', 'arrive', 'art', 'artificial', 'ask', 'asked', 'asks', 'assembly', 'assigning', 'asso

0.7293040293040294
best features:  ['annie', 'bartender', 'cabin', 'christian.1', 'corporation', 'greeted', 'guy', 'luxurious', 'originally', 'perfectly', 'program', 'report', 'serving', 'sixteen', 'stripped', 'tales', 'took', 'using']
0.7344160741219564
best features:  ['00', '118th', '12s', '16yearold', '1874', '1895', '19', '1950', '1960s', '1963', '1980s', '2007', '21', '24', '30.1', '30yearold', '4th2014', '5', '6', '600000', '7', 'aaron', 'abandon', 'abandoned', 'abroad', 'absolute', 'acclaimed', 'accumulate', 'accustomed', 'acme', 'actress', 'actress.1', 'actually', 'adapt', 'adrenaline', 'adventure', 'adventure.1', 'advice', 'affair', 'affair.1', 'affair.2', 'affecting', 'agency', 'agent.1', 'agent.2', 'aging', 'ahead.1', 'akhrot', 'alanadale', 'albert.1', 'allgirl', 'ally.1', 'alongside', 'alter', 'ancestors', 'anderson', 'angeles.2', 'animals', 'animated', 'antics', 'anymore', 'apart', 'appears', 'apprehended', 'apprentice', 'arends', 'argentinean', 'armand', 'army', 'arrest'

0.7382255979314802
best features:  ['12', '12s', '15', '1874', '1895', '1950', '1962', '1963', '1974', '1976', '1980s', '2007', '21', '21yearold', '30', '30.1', '30yearold', '3d', '4th2014', '5', '600000', '7', '70s', '74th', 'abandon', 'able', 'absolute', 'access', 'accident', 'accident.1', 'accidentally', 'acclaimed', 'account', 'accumulate', 'accuseds', 'achieve', 'acme', 'acquisitions', 'act', 'acting', 'actionpacked', 'activists', 'actor', 'actorgigolo', 'actress', 'actually', 'adams', 'administrator', 'admirer', 'adult', 'advancement', 'adventure', 'affect', 'affecting', 'affections', 'age.1', 'agency', 'agent', 'agents', 'aging', 'ago', 'ago.1', 'agree', 'agrees', 'agriculture', 'ahead', 'ahead.1', 'ahkmenrah', 'ahkmenrah.1', 'aid', 'aided', 'akhrot', 'alan', 'alaska', 'albert.1', 'alberts', 'alexei', 'alice', 'alive.1', 'ally', 'ally.1', 'alongside', 'alter', 'altered', 'amar', 'amelia.1', 'america', 'amid', 'amidst', 'anderson', 'angeles.2', 'animals', 'anna', 'annie', 'antics

0.7506215353274177
best features:  ['118th', '15', '15year', '1895', '1950', '24000', '30', '3d', '6', '70s', 'abandon', 'academic', 'accident.1', 'accused', 'accuseds', 'actionpacked', 'activists', 'actorgigolo', 'adolescent', 'advancement', 'adventure.1', 'affairs', 'affections', 'agent', 'ago', 'agree', 'ahkmenrah', 'albert', 'alberts', 'alice', 'ally', 'alongside', 'amelia', 'america', 'ancestors', 'anderson', 'appearing', 'armed', 'army', 'arrest', 'assembly', 'astounding', 'attention', 'attila', 'attitudes', 'attorney', 'auditions', 'avoid', 'awesome', 'baby.1', 'background', 'bajaj', 'ball.1', 'baronial', 'baseball.1', 'batman', 'battling', 'baymax', 'beach.1', 'befalls', 'befriend', 'belief', 'believes', 'best.1', 'bhoot', 'bhoothnath', 'bhoothnaths', 'big.1', 'bigfoot.1', 'birthday', 'black', 'blessings', 'bloom', 'blue', 'blurs', 'bobby', 'body.1', 'bohemian', 'boil', 'bond.1', 'book', 'boot', 'born', 'bourne', 'boxer', 'boy.1', 'boyfriend.2', 'boys', 'boys.2', 'brass', 'brea

better features:  ['12s', '15year', '16yearold', '1882', '19', '1962', '1976', '2007', '21yearold', '22', '4th2014', '6', '6.1', '600000', '70s', 'abandon', 'able', 'abortion', 'abortion.1', 'abroad', 'academic', 'accident.1', 'account', 'acme', 'acquisitions', 'act', 'action', 'activists', 'actor', 'actorgigolo', 'actually.1', 'adapt', 'adolescent', 'adrenaline', 'adult', 'affair', 'affair.1', 'affairs', 'affecting', 'affections', 'afraid', 'africa', 'african', 'agent', 'agent.1', 'agent.2', 'agents', 'aging', 'ago', 'agrees.1', 'agriculture', 'alanadale', 'alaska', 'albert.1', 'alive', 'alive.1', 'ally.1', 'altered', 'amelia.1', 'america', 'american', 'amid', 'angeles', 'angeles.2', 'animals', 'antics', 'anymore', 'apartment', 'appearing', 'apprentice', 'archie', 'area', 'areas', 'argentinean', 'army', 'arrives', 'artistic', 'asia', 'ask', 'association', 'assortment', 'atrocities', 'attempt', 'attempts', 'attend', 'attention', 'attinger', 'attractive', 'auditions', 'augustus', 'autob

better features:  ['00', '12', '15', '1895', '1950', '1962', '1974', '1976', '21yearold', '22', '24', '3d', '4th2014', '5', '60', 'Unnamed: 43', 'aaron', 'abandoned', 'absolute', 'accident', 'accident.1', 'accumulate', 'accustomed', 'acquisitions', 'act', 'acting', 'action', 'actor', 'adams', 'adaptation', 'adolescent', 'adventure.1', 'affair.1', 'affecting', 'africa', 'age', 'ago.1', 'agriculture', 'akhrot', 'alaska', 'albert', 'alike', 'alive', 'allgirl', 'alliance', 'allow', 'allows', 'ally.1', 'alter', 'altered', 'alzheimers', 'amelia', 'amelia.1', 'amid', 'ancestors', 'angeles.1', 'angeles.2', 'animated', 'anymore', 'apartment', 'appearing', 'appears', 'approval', 'approve', 'archer', 'arends', 'arent', 'armands', 'arranges', 'arrest', 'arrives', 'artificial', 'arts', 'asia', 'assigning', 'attempt', 'attila', 'attinger', 'attracted', 'augustus', 'available', 'average', 'avoid', 'await', 'away', 'babadook.1', 'babar', 'baby', 'bacterial', 'baggins', 'balan', 'ballerina', 'ballet', 

better features:  ['118th', '12', '1963', '21yearold', '22', '24', '30.1', '34', '60', 'able', 'abortion', 'abortion.1', 'abroad', 'accept', 'accident.1', 'acme', 'acrimony', 'actionpacked', 'actorgigolo', 'actually', 'adaptation', 'admirer', 'adolescent', 'advancement', 'adventure', 'affair.1', 'affair.2', 'affect', 'affecting', 'affections', 'afraid', 'africa', 'agrees.1', 'agriculture', 'ahkmenrah.1', 'aided', 'akhrot', 'albert', 'albert.1', 'alien', 'allow', 'ally.1', 'alongside', 'alter', 'amelia.1', 'animals', 'announce', 'appearing', 'approval', 'area', 'areas', 'arent', 'armands', 'armed', 'army', 'arrest', 'arrive', 'arrived', 'arrives', 'arts', 'asked', 'assigning', 'assume', 'athlete', 'attempt', 'attend', 'attention', 'attorney', 'available', 'await', 'awards', 'aware', 'awesome', 'babadook', 'background', 'bad', 'ball.1', 'base', 'bass', 'battling', 'baymax', 'beach.1', 'bear', 'beat', 'beautiful', 'beautiful.1', 'begun', 'bhoot', 'bilbo', 'billionaire', 'binoche', 'black'

better features:  ['19', '21yearold', '3d', 'academic', 'acclaimed', 'acme', 'act', 'actionpacked', 'affect', 'afraid', 'agent.1', 'aging', 'ago.1', 'ally', 'altered', 'anna', 'apologize', 'appearing', 'approve', 'archer', 'areas', 'arrives', 'ask', 'assigning', 'atrocities', 'bacterial', 'bans', 'bat', 'began.1', 'best', 'bhau', 'big.1', 'bigger.1', 'binds', 'bit', 'blair', 'blamed', 'blessings', 'bobby.1', 'bourne.1', 'brass', 'broadcast', 'brother', 'bugs.1', 'bullied', 'busboy', 'capture', 'case', 'challenge', 'changed', 'child.1', 'choice.1', 'christian.1', 'chungsu', 'cia', 'city.2', 'claims', 'clients.1', 'clubs', 'clubs.1', 'code', 'codebreakers', 'community', 'compiled', 'condemnation', 'corps', 'costello', 'covering', 'crashes', 'critic', 'curse', 'cursed', 'deal', 'decepticons', 'decision', 'degenerative', 'denial', 'dent', 'desperately', 'destruction', 'detective', 'devastated', 'develops', 'devoted', 'diamond', 'diggler', 'discovered.2', 'disdain', 'districts.1', 'dog.2', 

0.6360805860805862
best features:  ['country_Bahamas', 'country_China', 'country_Egypt', 'country_Spain', 'country_UK', 'country_USA', 'genre_Biography', 'genre_Fantasy', 'genre_Music', 'genre_Western', 'language_Arabic', 'language_English', 'language_Greek', 'language_Inuktitut', 'language_Navajo', 'language_Vietnamese', 'rated_G', 'rated_R']
0.7312021312021313
best features:  ['country_France', 'genre_Crime', 'genre_Horror', 'language_Dutch', 'language_German', 'language_Hindi', 'language_Korean', 'language_Latin', 'language_Vietnamese']
0.7395354645354646
best features:  ['country_Argentina', 'country_USA', 'genre_Crime', 'genre_Family', 'genre_Fantasy', 'language_Dutch', 'language_English', 'language_Hungarian', 'language_Urdu']
better features:  ['country_Bahamas', 'genre_Crime', 'genre_Musical', 'language_Navajo', 'language_Swahili']
better features:  ['genre_Crime', 'language_Inuktitut']
0.7415501165501165
best features:  ['country_Argentina', 'country_Australia', 'country_Baham

0.7078431372549019
best features:  ['language_Greek']
0.712987502693385
best features:  ['country_Bahamas', 'country_China', 'country_France', 'country_Italy', 'country_NewZealand', 'genre_Mystery', 'genre_Sport', 'genre_Western', 'language_Filipino', 'language_Hungarian', 'language_Korean', 'language_Navajo', 'language_Urdu', 'language_Yiddish', 'rated_TV-MA']
0.7395354645354646
best features:  ['genre_Crime']
0.767978687978688
best features:  ['country_Egypt', 'country_France', 'country_Spain', 'genre_Crime', 'genre_Romance', 'language_Greek', 'language_Japanese', 'language_Latin', 'language_Urdu', 'rated_N/A']
0.626007326007326
best features:  ['country_China', 'country_France', 'country_Japan', 'country_SouthAfrica', 'genre_Action', 'genre_Biography', 'genre_Family', 'genre_Musical', 'genre_Romance', 'genre_Sport', 'language_English', 'language_French', 'language_Hindi', 'language_Inuktitut', 'language_Japanese', 'language_Latin', 'language_Serbian', 'language_Yiddish', 'rated_G', 

0.7292790542790544
best features:  ['country_Netherlands', 'genre_Crime', 'genre_Horror', 'genre_War', 'language_AmericanSignLanguage', 'language_Filipino', 'language_Korean', 'language_Urdu', 'language_Yiddish']
0.7446053946053947
best features:  ['country_Bahamas', 'country_France', 'country_India', 'country_Netherlands', 'genre_Crime', 'genre_Family', 'genre_Musical', 'genre_Sport', 'genre_War', 'language_Inuktitut', 'language_Mandarin', 'language_Serbian', 'rated_PG']
0.7500299700299701
best features:  ['country_Argentina', 'country_Ireland', 'country_Italy', 'country_Netherlands', 'genre_Crime', 'genre_Music', 'language_Danish', 'language_Greek', 'language_Mandarin', 'rated_NOT RATED']
0.4994172494172494
best features:  ['country_Argentina', 'country_China', 'country_CzechRepublic', 'country_Germany', 'country_Ireland', 'country_SouthAfrica', 'country_SouthKorea', 'genre_Adventure', 'genre_Biography', 'genre_Drama', 'genre_Horror', 'genre_Sport', 'genre_Thriller', 'language_Americ

0.6334212356271178
best features:  ['country_Egypt', 'language_Arabic', 'language_German', 'rated_PG-13']
0.7240592740592742
best features:  ['country_India', 'country_Italy', 'genre_Crime', 'language_French']
0.7395354645354646
best features:  ['country_SouthKorea', 'genre_Crime', 'genre_Musical', 'language_Croatian', 'language_Greek']
0.7400849150849151
best features:  ['country_Bahamas', 'country_Egypt', 'country_France', 'genre_Crime', 'genre_Drama', 'genre_Musical', 'genre_War', 'genre_Western', 'language_AmericanSignLanguage', 'language_Danish', 'language_Greek', 'language_Inuktitut', 'language_Swahili']
0.7452680652680652
best features:  ['country_Spain', 'genre_Crime', 'language_Dutch', 'language_Serbian', 'rated_TV-MA']
0.7500299700299701
best features:  ['country_Argentina', 'country_Egypt', 'genre_Crime', 'genre_Horror', 'language_Hungarian']


In [137]:
dw.f_audio[1]

[1, 4, 5]

In [173]:
import warnings
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestCentroid
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB # put NOTE into paper - not sure if correct bayes
from sklearn.model_selection import StratifiedKFold # put NOTE into paper - better as common kfold sampling
from sklearn.model_selection import cross_validate
from sklearn.base import clone
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from functools import reduce

CI_95_FACTOR = 1.96

class BaseClassifier:
    
    def __init__(self, clf, clf_name, modality):
        self.clf = clf
        self.clf_name = clf_name
        self.modality = modality
        
    def fit(self, df_features, df_targets, verbose=True):
        self.clf = clone(self.clf) # reset any previously trained model
        np.random.seed(32143421)
        if verbose:
            print(f"Starting training for classifier {self.clf_name} and modality {self.modality}")
        self.clf.fit(df_features, df_targets)
        
    def predict(self, df_features, verbose=True):
        if verbose:
            print(f"Starting prediction for classifier {self.clf_name} and modality {self.modality}")
        return self.clf.predict(df_features)    
        
class ClassifierFactory:
    
    @staticmethod
    def get_metadata_classifiers():
        return [BaseClassifier(KNeighborsClassifier(), 'k-Nearest neighbors', 'Metadata'),
                BaseClassifier(NearestCentroid(), 'Nearest mean classifier', 'Metadata'),
                BaseClassifier(DecisionTreeClassifier(), 'Decision tree', 'Metadata'),
                BaseClassifier(LogisticRegression(), 'Logistic regression', 'Metadata'),
                BaseClassifier(SVC(gamma='auto'), 'SVM (Gaussian Kernel)', 'Metadata'),
                BaseClassifier(BaggingClassifier(),'Bagging', 'Metadata'),
                BaseClassifier(RandomForestClassifier(n_estimators=10),'Random Forest', 'Metadata'),
                BaseClassifier(AdaBoostClassifier(), 'AdaBoost', 'Metadata'),
                BaseClassifier(GradientBoostingClassifier(), 'Gradient Boosting Tree', 'Metadata')]
    
    @staticmethod
    def get_text_classifiers():
        return [BaseClassifier(GaussianNB(), 'Naive Bayes', 'Textual'),
                BaseClassifier(KNeighborsClassifier(), 'k-Nearest neighbors', 'Textual'),
                BaseClassifier(SVC(gamma='auto'), 'SVM (Gaussian Kernel)', 'Textual')]
    
    @staticmethod
    def get_visual_classifiers():
        return [BaseClassifier(KNeighborsClassifier(), 'k-Nearest neighbors', 'Visual'),
                BaseClassifier(DecisionTreeClassifier(), 'Decision tree', 'Visual'),
                BaseClassifier(LogisticRegression(), 'Logistic regression', 'Visual'),
                BaseClassifier(SVC(gamma='auto'), 'SVM (Gaussian Kernel)', 'Visual'),
                BaseClassifier(RandomForestClassifier(n_estimators=10), 'Random Forest', 'Visual'),
                BaseClassifier(AdaBoostClassifier(), 'AdaBoost', 'Visual'),
                BaseClassifier(GradientBoostingClassifier(), 'Gradient Boosting Tree', 'Visual')]
    
    @staticmethod
    def get_audio_classifiers():
        return [BaseClassifier(LogisticRegression(), 'Logistic regression', 'Audio'),
                BaseClassifier(GradientBoostingClassifier(), 'Gradient Boosting Tree', 'Audio')]                
                

class DataWrapper:
    
    def __init__(self, df_audio, df_vis, df_txt, df_meta, df_targets):
        self.df_audio = df_audio
        self.df_vis = df_vis
        self.df_txt = df_txt
        self.df_meta = df_meta
        self.df_targets = df_targets
        
        self.f_audio=[]
        self.f_vis=[]
        self.f_txt = []
        self.f_meta = []
        
    def generate_subspace(self):
        # TODO insert LVW functionality
        # N = len(df_meta)
        # MAX_TRIES = 77*N^5
        # print(lvf(MAX_TRIES,df_meta, df_targets_train,5))
        self.df_audio = self.df_audio.drop('file_name', axis=1)
        self.df_vis = self.df_vis.drop('file_name', axis=1)
        self.df_txt = self.df_txt.drop('file_name', axis=1)
        self.df_meta = self.df_meta.drop('file_name', axis=1)       
        

        #self.f_audio = self.getMBestFeatures(self.df_audio, self.df_targets, ClassifierFactory.get_audio_classifiers())
        #self.f_vis   = self.getMBestFeatures(self.df_vis, self.df_targets, ClassifierFactory.get_visual_classifiers())
        #self.f_txt   = self.getMBestFeatures(self.df_txt, self.df_targets, ClassifierFactory.get_text_classifiers())
        #self.f_meta  = self.getMBestFeatures(self.df_meta, self.df_targets, ClassifierFactory.get_metadata_classifiers())
        self.f_audio = [[1], [1, 4, 5]]
        self.f_vis = [[0,  2,  4,  6,  11,  14,  16,  19,  25,  26,  32,  33,  36,  40,  42,  43,  46,  48,  50,  56,  57,  59,  65,  66,  67,  68,  69,  77,  79,  86,  91,  93,  94,  95,  96,  99,  101,  102,  103,  104,  105,  106,  108,  109,  110,  112,  120,  122,  125,  126,  127,  132,  137,  140,  144,  148,  150,  151,  153,  154,  155,  156,  158,  159,  160,  162,  174,  175,  176,  178,  179,  185,  186,  191,  196,  202,  208,  212,  215,  221,  227,  228,  229,  237,  239,  241,  244,  248,  251,  253,  254,  257,  262,  265,  281,  285,  289,  291,  293,  295,  297,  298,  301,  310,  312,  316,  317,  318,  319,  322,  324,  326,  331,  334,  335,  336,  338,  343,  344,  346,  348,  350,  355,  365,  367,  372,  374,  375,  381,  385,  386,  395,  397,  398,  399,  401,  404,  408,  409,  412,  414,  415,  416,  420,  421,  422,  424,  429,  432,  438,  440,  442,  443,  445,  447,  454,  455,  457,  462,  463,  465,  467,  468,  469,  477,  479,  480,  487,  491,  492,  493,  494,  496,  497,  498,  499,  501,  503,  507,  509,  515,  519,  527,  528,  529,  530,  533,  540,  551,  553,  554,  555,  556,  559,  562,  563,  567,  579,  580,  581,  585,  589,  592,  594,  595,  598,  599,  602,  610,  611,  615,  620,  621,  625,  626,  627,  630,  632,  633,  638,  642,  647,  649,  653,  654,  656,  658,  666,  671,  676,  681,  682,  686,  688,  694,  704,  710,  712,  716,  717,  718,  721,  722,  723,  727,  733,  741,  742,  744,  751,  754,  759,  761,  764,  766,  767,  772,  782,  792,  794,  798,  800,  801,  807,  808,  810,  811,  816,  817,  820,  821,  826,  828,  829,  830,  833,  840,  845,  846,  851,  856,  858,  865,  867,  868,  869,  870,  871,  874,  875,  877,  880,  882,  884,  891,  895,  898,  899,  901,  905,  907,  908,  910,  917,  918,  920,  922,  927,  928,  929,  930,  932,  937,  938,  949,  952,  954,  959,  960,  963,  965,  972,  973,  975,  976,  977,  981,  984,  987,  989,  990,  995,  996,  999,  1000,  1005,  1007,  1008,  1016,  1017,  1019,  1020,  1025,  1031,  1035,  1038,  1039,  1041,  1042,  1049,  1050,  1051,  1056,  1060,  1061,  1063,  1068,  1069,  1070,  1071,  1072,  1074,  1075,  1080,  1085,  1091,  1092,  1093,  1094,  1095,  1097,  1100,  1103,  1112,  1114,  1117,  1122,  1124,  1125,  1126,  1127,  1134,  1135,  1136,  1137,  1139,  1150,  1157,  1158,  1160,  1162,  1163,  1167,  1172,  1175,  1176,  1178,  1179,  1184,  1191,  1196,  1198,  1203,  1206,  1207,  1209,  1213,  1214,  1220,  1221,  1227,  1229,  1230,  1235,  1236,  1238,  1239,  1240,  1253,  1256,  1261,  1265,  1268,  1272,  1276,  1280,  1286,  1291,  1298,  1299,  1309,  1311,  1317,  1320,  1323,  1324,  1326,  1328,  1333,  1334,  1335,  1337,  1340,  1344,  1350,  1352,  1359,  1363,  1367,  1369,  1374,  1381,  1383,  1385,  1393,  1397,  1401,  1403,  1404,  1406,  1407,  1415,  1417,  1418,  1419,  1421,  1424,  1431,  1432,  1434,  1435,  1439,  1442,  1443,  1447,  1448,  1451,  1452,  1456,  1458,  1460,  1468,  1469,  1470,  1473,  1475,  1484,  1485,  1488,  1489,  1490,  1493,  1494,  1497,  1498,  1509,  1514,  1517,  1521,  1530,  1533,  1537,  1547,  1551,  1552,  1556,  1557,  1559,  1561,  1562,  1564,  1569,  1571,  1579,  1580,  1582,  1583,  1584,  1586,  1592,  1593,  1596,  1600,  1604,  1605,  1606,  1612,  1614,  1616,  1620,  1624,  1628,  1631,  1633,  1638,  1644,  1645,  1647], 
                        [1,  2,  3,  5,  11,  12,  17,  18,  19,  22,  24,  25,  27,  28,  29,  31,  34,  35,  42,  43,  44,  46,  47,  50,  52,  55,  56,  57,  59,  60,  62,  63,  64,  65,  66,  68,  70,  75,  78,  80,  82,  84,  85,  87,  88,  89,  90,  95,  97,  100,  101,  102,  104,  105,  106,  107,  108,  113,  119,  120,  121,  122,  123,  124,  125,  126,  127,  131,  132,  133,  134,  138,  139,  140,  142,  144,  145,  146,  148,  151,  153,  156,  160,  163,  164,  165,  167,  168,  169,  170,  173,  175,  176,  177,  178,  179,  180,  181,  182,  183,  187,  189,  191,  194,  196,  197,  202,  204,  205,  206,  207,  208,  210,  211,  212,  213,  214,  215,  217,  219,  220,  222,  223,  224,  225,  226,  227,  230,  232,  234,  235,  237,  238,  239,  240,  242,  243,  244,  245,  247,  248,  250,  252,  255,  256,  258,  259,  260,  261,  263,  264,  265,  266,  267,  268,  269,  271,  272,  274,  278,  280,  281,  283,  286,  287,  288,  289,  294,  295,  298,  300,  303,  305,  310,  311,  312,  316,  317,  318,  320,  321,  322,  324,  326,  328,  329,  332,  334,  335,  337,  338,  342,  344,  345,  346,  347,  348,  350,  351,  353,  356,  358,  359,  360,  362,  363,  364,  365,  366,  369,  370,  375,  376,  377,  380,  381,  383,  384,  388,  389,  392,  393,  394,  395,  397,  399,  400,  401,  403,  406,  407,  408,  409,  411,  412,  413,  414,  416,  417,  418,  420,  423,  424,  425,  426,  429,  430,  431,  432,  433,  435,  436,  441,  444,  446,  447,  450,  451,  453,  454,  456,  457,  458,  460,  462,  463,  464,  466,  467,  469,  470,  475,  476,  480,  481,  482,  485,  486,  487,  489,  490,  491,  496,  497,  499,  502,  505,  508,  511,  512,  513,  514,  516,  517,  518,  521,  527,  529,  531,  532,  533,  536,  539,  541,  544,  546,  547,  549,  554,  555,  557,  558,  561,  563,  570,  573,  574,  577,  578,  579,  582,  583,  584,  586,  587,  588,  592,  595,  596,  597,  598,  601,  602,  604,  605,  606,  607,  608,  610,  612,  613,  615,  616,  617,  618,  619,  620,  622,  623,  625,  626,  627,  630,  631,  634,  640,  641,  644,  646,  649,  650,  651,  652,  653,  657,  659,  663,  664,  665,  667,  668,  669,  673,  674,  682,  683,  684,  686,  687,  690,  693,  694,  696,  697,  698,  699,  702,  703,  705,  706,  709,  710,  713,  714,  715,  716,  717,  719,  720,  724,  728,  729,  731,  732,  733,  734,  739,  740,  741,  742,  744,  748,  749,  751,  752,  753,  754,  755,  759,  762,  763,  764,  767,  771,  772,  774,  777,  778,  779,  781,  782,  788,  791,  793,  798,  799,  800,  804,  806,  807,  808,  810,  814,  816,  818,  821,  823,  824,  826,  827,  828,  831,  833,  836,  839,  844,  846,  847,  848,  851,  853,  854,  856,  858,  859,  860,  861,  864,  865,  866,  869,  871,  873,  876,  879,  880,  881,  885,  886,  888,  889,  897,  898,  899,  904,  907,  909,  911,  914,  917,  918,  920,  921,  922,  923,  924,  925,  927,  930,  931,  936,  938,  942,  943,  944,  946,  948,  949,  950,  951,  952,  953,  962,  963,  966,  967,  968,  973,  974,  975,  976,  977,  978,  979,  981,  982,  983,  986,  991,  992,  995,  996,  997,  998,  1000,  1001,  1002,  1005,  1008,  1011,  1013,  1016,  1017,  1020,  1022,  1024,  1026,  1027,  1032,  1034,  1036,  1039,  1040,  1041,  1044,  1045,  1046,  1049,  1050,  1051,  1052,  1054,  1057,  1059,  1060,  1061,  1062,  1065,  1067,  1068,  1069,  1070,  1073,  1074,  1075,  1078,  1080,  1081,  1085,  1086,  1087,  1088,  1091,  1092,  1093,  1094,  1096,  1098,  1099,  1100,  1102,  1103,  1104,  1106,  1107,  1108,  1112,  1113,  1114,  1116,  1117,  1118,  1122,  1123,  1125,  1126,  1128,  1129,  1130,  1131,  1132,  1134,  1137,  1138,  1139,  1142,  1143,  1145,  1147,  1151,  1152,  1153,  1155,  1159,  1160,  1162,  1164,  1165,  1166,  1167,  1168,  1169,  1170,  1171,  1174,  1176,  1178,  1179,  1181,  1182,  1184,  1185,  1186,  1187,  1188,  1189,  1190,  1193,  1195,  1196,  1198,  1199,  1202,  1203,  1205,  1206,  1208,  1209,  1210,  1211,  1212,  1213,  1214,  1215,  1216,  1217,  1218,  1219,  1220,  1223,  1224,  1225,  1226,  1228,  1231,  1233,  1237,  1238,  1239,  1242,  1243,  1250,  1252,  1253,  1254,  1256,  1257,  1260,  1262,  1264,  1265,  1267,  1268,  1270,  1271,  1272,  1273,  1275,  1278,  1280,  1281,  1286,  1290,  1291,  1292,  1296,  1297,  1298,  1303,  1305,  1308,  1309,  1310,  1313,  1314,  1316,  1317,  1319,  1320,  1321,  1322,  1323,  1324,  1325,  1329,  1330,  1331,  1332,  1333,  1335,  1337,  1338,  1346,  1348,  1350,  1352,  1357,  1358,  1361,  1362,  1365,  1366,  1367,  1369,  1370,  1371,  1372,  1374,  1377,  1379,  1381,  1382,  1384,  1385,  1387,  1388,  1390,  1391,  1393,  1394,  1395,  1396,  1397,  1398,  1400,  1403,  1404,  1406,  1407,  1408,  1411,  1412,  1413,  1414,  1416,  1420,  1421,  1422,  1424,  1425,  1426,  1430,  1431,  1432,  1433,  1435,  1437,  1438,  1440,  1443,  1444,  1445,  1446,  1449,  1450,  1451,  1452,  1455,  1457,  1458,  1460,  1463,  1465,  1470,  1471,  1473,  1474,  1475,  1479,  1480,  1481,  1483,  1484,  1487,  1491,  1493,  1495,  1497,  1498,  1500,  1501,  1503,  1506,  1507,  1508,  1510,  1512,  1514,  1515,  1516,  1518,  1519,  1520,  1522,  1523,  1524,  1525,  1526,  1528,  1529,  1530,  1531,  1532,  1534,  1535,  1536,  1537,  1538,  1539,  1540,  1542,  1543,  1545,  1549,  1553,  1555,  1556,  1557,  1558,  1559,  1562,  1563,  1564,  1566,  1572,  1573,  1574,  1575,  1576,  1578,  1579,  1584,  1585,  1588,  1591,  1592,  1593,  1595,  1598,  1600,  1603,  1605,  1606,  1607,  1609,  1611,  1612,  1614,  1615,  1616,  1617,  1619,  1621,  1622,  1623,  1627,  1628,  1631,  1632,  1636,  1637,  1638,  1639,  1640,  1641,  1643,  1644,  1646,  1647,  1650], 
                        [2,  3,  4,  5,  7,  10,  12,  16,  18,  20,  23,  30,  31,  34,  37,  38,  39,  45,  52,  54,  58,  59,  62,  66,  68,  73,  75,  76,  82,  85,  86,  88,  92,  93,  95,  99,  102,  105,  107,  108,  109,  110,  111,  114,  121,  125,  128,  129,  135,  136,  137,  139,  142,  148,  150,  153,  156,  158,  159,  160,  162,  163,  165,  166,  169,  170,  174,  177,  178,  181,  186,  189,  191,  195,  196,  198,  201,  203,  204,  205,  206,  207,  208,  209,  210,  213,  214,  219,  223,  234,  237,  238,  242,  245,  248,  260,  266,  268,  273,  274,  279,  282,  287,  293,  294,  297,  304,  305,  306,  313,  315,  316,  322,  324,  330,  335,  336,  337,  342,  344,  345,  352,  360,  369,  376,  377,  380,  384,  386,  393,  396,  397,  399,  400,  401,  416,  422,  423,  425,  426,  429,  430,  431,  434,  440,  444,  445,  450,  452,  454,  455,  458,  461,  466,  471,  478,  482,  483,  484,  485,  491,  493,  497,  499,  502,  504,  506,  507,  512,  518,  519,  523,  531,  537,  540,  542,  543,  552,  553,  554,  561,  567,  569,  590,  592,  602,  603,  604,  605,  609,  611,  612,  615,  616,  617,  618,  620,  623,  626,  630,  632,  635,  636,  639,  640,  642,  643,  644,  645,  646,  647,  652,  655,  656,  663,  665,  666,  670,  671,  672,  673,  674,  676,  680,  683,  693,  694,  698,  701,  704,  707,  711,  714,  716,  717,  721,  723,  726,  728,  729,  730,  736,  740,  742,  744,  745,  746,  749,  755,  760,  764,  768,  771,  773,  776,  779,  784,  785,  788,  793,  796,  799,  801,  803,  812,  816,  817,  820,  827,  828,  832,  837,  841,  843,  846,  849,  851,  853,  856,  863,  865,  869,  871,  876,  878,  880,  881,  882,  885,  886,  889,  890,  891,  894,  896,  898,  899,  900,  901,  904,  906,  910,  911,  917,  920,  930,  948,  950,  952,  956,  959,  960,  961,  962,  965,  967,  968,  973,  975,  977,  981,  982,  984,  988,  990,  992,  995,  996,  1000,  1004,  1005,  1010,  1015,  1019,  1020,  1022,  1023,  1029,  1032,  1035,  1036,  1043,  1048,  1049,  1050,  1054,  1055,  1056,  1059,  1065,  1066,  1068,  1069,  1070,  1072,  1073,  1074,  1076,  1081,  1083,  1084,  1087,  1089,  1093,  1095,  1099,  1104,  1119,  1121,  1122,  1123,  1135,  1136,  1147,  1153,  1154,  1156,  1157,  1160,  1161,  1162,  1169,  1173,  1174,  1178,  1180,  1184,  1189,  1191,  1203,  1210,  1214,  1215,  1217,  1225,  1227,  1229,  1236,  1240,  1242,  1243,  1244,  1248,  1249,  1259,  1260,  1263,  1266,  1270,  1275,  1276,  1277,  1278,  1280,  1283,  1284,  1285,  1290,  1297,  1300,  1306,  1307,  1308,  1311,  1312,  1317,  1320,  1322,  1325,  1326,  1327,  1330,  1331,  1332,  1335,  1337,  1342,  1345,  1346,  1349,  1351,  1353,  1355,  1356,  1361,  1363,  1364,  1365,  1367,  1370,  1371,  1372,  1385,  1391,  1392,  1394,  1397,  1398,  1399,  1401,  1403,  1410,  1418,  1431,  1432,  1433,  1436,  1438,  1439,  1440,  1441,  1445,  1450,  1451,  1453,  1456,  1460,  1462,  1463,  1465,  1467,  1469,  1471,  1474,  1479,  1490,  1496,  1498,  1499,  1503,  1505,  1506,  1513,  1516,  1517,  1518,  1521,  1523,  1526,  1527,  1528,  1530,  1531,  1535,  1536,  1537,  1539,  1543,  1548,  1552,  1554,  1555,  1558,  1559,  1562,  1564,  1566,  1567,  1569,  1571,  1572,  1573,  1574,  1575,  1579,  1591,  1592,  1596,  1598,  1602,  1613,  1622,  1624,  1626,  1630,  1634,  1635,  1636,  1640,  1643,  1644,  1647,  1650], 
                        [47,  65,  71,  72,  101,  135,  151,  161,  169,  181,  196,  222,  228,  240,  255,  263,  283,  327,  335,  375,  396,  400,  433,  449,  464,  466,  495,  505,  524,  531,  551,  582,  599,  666,  743,  751,  764,  774,  776,  849,  851,  856,  866,  873,  874,  881,  893,  895,  899,  915,  934,  936,  962,  963,  970,  1002,  1005,  1032,  1106,  1147,  1196,  1232,  1243,  1268,  1288,  1322,  1323,  1365,  1374,  1380,  1384,  1392,  1395,  1409,  1420,  1458,  1472,  1518,  1547,  1565,  1590,  1601,  1612,  1616,  1629,  1631], 
                        [0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  45,  46,  48,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  92,  93,  94,  95,  96,  97,  98,  99,  100,  102,  103,  104,  105,  106,  107,  108,  109,  110,  111,  112,  113,  114,  116,  117,  118,  119,  120,  121,  123,  124,  125,  126,  127,  128,  129,  130,  131,  132,  133,  134,  135,  136,  137,  138,  139,  140,  142,  143,  144,  145,  146,  147,  148,  149,  151,  152,  153,  154,  155,  156,  157,  158,  159,  160,  161,  162,  163,  164,  165,  166,  167,  168,  169,  170,  171,  172,  173,  174,  175,  176,  177,  178,  179,  180,  181,  182,  183,  184,  185,  186,  187,  188,  189,  190,  191,  192,  193,  194,  195,  196,  197,  198,  199,  200,  201,  202,  203,  204,  205,  206,  207,  208,  209,  210,  211,  212,  213,  214,  215,  216,  217,  218,  220,  221,  222,  223,  224,  225,  226,  227,  228,  229,  230,  231,  232,  233,  234,  235,  236,  237,  238,  239,  240,  241,  242,  243,  244,  245,  246,  247,  248,  249,  250,  251,  252,  253,  254,  255,  256,  257,  258,  259,  260,  261,  262,  263,  264,  265,  266,  267,  268,  269,  270,  271,  272,  273,  274,  275,  276,  277,  278,  279,  280,  281,  282,  283,  284,  285,  286,  287,  288,  289,  290,  291,  292,  293,  294,  295,  296,  297,  298,  299,  300,  301,  302,  303,  304,  305,  306,  307,  308,  309,  310,  311,  312,  313,  314,  315,  316,  317,  318,  319,  320,  321,  322,  323,  324,  325,  326,  327,  328,  330,  331,  332,  333,  334,  335,  336,  337,  338,  339,  340,  341,  342,  343,  344,  346,  347,  348,  349,  350,  351,  352,  354,  355,  356,  357,  358,  359,  360,  361,  362,  363,  364,  365,  366,  368,  369,  370,  371,  372,  373,  374,  375,  376,  377,  378,  379,  380,  381,  382,  383,  384,  385,  386,  387,  389,  390,  391,  392,  393,  394,  395,  396,  397,  398,  399,  400,  401,  402,  403,  404,  406,  407,  408,  409,  410,  411,  412,  413,  414,  415,  416,  417,  418,  419,  420,  421,  422,  423,  424,  425,  426,  427,  428,  429,  430,  431,  432,  433,  434,  435,  436,  437,  438,  439,  440,  441,  442,  443,  444,  445,  446,  447,  448,  449,  450,  451,  452,  453,  454,  455,  456,  457,  458,  459,  460,  461,  462,  463,  464,  465,  466,  467,  468,  469,  470,  471,  472,  473,  474,  475,  476,  477,  478,  479,  481,  482,  483,  484,  485,  486,  487,  488,  489,  490,  491,  492,  493,  494,  495,  496,  497,  498,  499,  500,  501,  502,  503,  504,  505,  506,  507,  508,  509,  510,  511,  512,  513,  514,  515,  516,  517,  518,  519,  520,  521,  522,  523,  524,  525,  526,  527,  528,  529,  532,  533,  534,  535,  536,  537,  538,  539,  540,  541,  542,  543,  544,  545,  546,  547,  548,  549,  550,  551,  552,  553,  554,  555,  556,  557,  558,  559,  560,  561,  562,  563,  564,  565,  567,  568,  569,  570,  571,  572,  573,  574,  575,  576,  577,  578,  579,  580,  581,  582,  583,  584,  585,  586,  587,  588,  589,  590,  591,  592,  593,  594,  595,  596,  597,  598,  599,  600,  601,  602,  603,  604,  605,  606,  607,  608,  609,  610,  611,  612,  613,  614,  615,  616,  617,  618,  619,  620,  621,  622,  623,  624,  625,  626,  627,  628,  629,  630,  631,  632,  633,  634,  635,  636,  637,  638,  639,  640,  641,  642,  643,  644,  645,  646,  647,  648,  649,  650,  651,  652,  653,  654,  655,  656,  657,  658,  659,  660,  661,  662,  663,  664,  665,  666,  667,  668,  669,  670,  671,  672,  673,  674,  675,  676,  677,  678,  679,  680,  681,  682,  683,  684,  685,  686,  687,  688,  689,  690,  691,  692,  693,  694,  695,  696,  697,  698,  699,  700,  701,  702,  703,  704,  705,  706,  707,  708,  709,  710,  711,  712,  713,  714,  715,  716,  717,  718,  719,  720,  721,  722,  723,  724,  725,  726,  727,  728,  729,  730,  731,  732,  733,  734,  735,  736,  737,  738,  739,  740,  741,  742,  743,  744,  745,  746,  747,  748,  749,  750,  751,  752,  753,  754,  755,  756,  757,  758,  759,  760,  761,  762,  763,  764,  765,  766,  767,  768,  769,  770,  771,  772,  773,  774,  775,  776,  777,  778,  779,  780,  781,  782,  783,  784,  785,  786,  787,  788,  789,  790,  791,  792,  793,  794,  795,  796,  797,  798,  799,  800,  801,  802,  803,  804,  805,  806,  807,  808,  809,  810,  812,  813,  814,  815,  816,  817,  818,  820,  821,  822,  823,  824,  825,  826,  827,  828,  829,  830,  831,  832,  833,  834,  835,  836,  837,  838,  839,  840,  841,  842,  843,  844,  845,  846,  847,  848,  849,  850,  851,  852,  853,  854,  855,  856,  857,  858,  859,  860,  861,  862,  863,  864,  865,  866,  867,  868,  869,  870,  871,  872,  873,  874,  875,  876,  877,  878,  879,  880,  881,  882,  883,  884,  885,  886,  887,  888,  889,  890,  891,  892,  893,  894,  895,  896,  897,  898,  899,  900,  901,  902,  903,  904,  905,  906,  907,  908,  909,  910,  911,  912,  913,  914,  915,  917,  918,  919,  920,  921,  922,  923,  924,  925,  926,  927,  928,  929,  931,  932,  933,  934,  935,  936,  937,  938,  939,  940,  941,  942,  943,  944,  945,  946,  947,  948,  949,  950,  951,  952,  953,  954,  955,  956,  957,  958,  959,  960,  961,  962,  963,  964,  965,  966,  967,  968,  969,  970,  971,  972,  973,  974,  975,  976,  977,  978,  979,  980,  981,  982,  983,  985,  986,  987,  988,  989,  990,  991,  992,  993,  994,  995,  996,  997,  998,  999,  1000,  1001,  1002,  1003,  1004,  1005,  1006,  1007,  1008,  1009,  1010,  1011,  1012,  1013,  1014,  1015,  1016,  1017,  1019,  1020,  1021,  1022,  1023,  1025,  1026,  1027,  1028,  1029,1030,  1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098,1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131,1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165,1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198,1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231,1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264,1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298,1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, 1330, 1331, 1332,1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365,1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1398, 1399,1400, 1401, 1402, 1403, 1404, 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433,1434, 1435, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467,1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500,1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519, 1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533,1534, 1536, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1552, 1553, 1554, 1555, 1556, 1557, 1558, 1559, 1560, 1561, 1562, 1563, 1564, 1565, 1566, 1567, 1568,1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1595, 1596, 1597, 1598, 1599, 1600, 1601,1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634,1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650], 
                        [2,  7,  9,  13,  22,  42,  54,  63,  68,  80,  91,  112,  116,  119,  129,  139,  143,  153,  166,  174,  183,  188,  194,  196,  220,  228,  257,  284,  286,  288,  330,  332,  367,  383,  392,  413,  416,  426,  436,  439,  442,  454,  481,  487,  492,  494,  528,  533,  537,  550,  557,  570,  583,  586,  595,  597,  613,  630,  653,  675,  690,  704,  707,  709,  721,  722,  751,  753,  769,  779,  780,  787,  803,  813,  817,  818,  825,  840,  847,  857,  876,  878,  881,  886,  899,  922,  934,  938,  940,  941,  963,  969,  985,  997,  1001,  1003,  1005,  1013,  1027,  1030,  1038,  1043,  1044,  1056,  1062,  1069,  1096,  1129,  1133,  1149,  1152,  1155,  1166,  1173,  1178,  1180,  1181,  1187,  1192,  1194,  1216,  1220,  1231,  1234,  1258,  1284,  1290,  1319,  1324,  1335,  1342,  1345,  1363,  1374,  1381,  1385,  1387,  1391,  1394,  1418,  1421,  1423,  1424,  1429,  1430,  1432,  1434,  1466,  1481,  1504,  1511,  1523,  1553,  1558,  1578,  1590,  1592,  1601,  1603,  1612,  1618,  1620,  1628,  1629], 
                        [72, 772, 816, 1259, 1377, 1499, 1520, 1524, 1538, 1639]]
        self.f_txt = [['1882',  '1895',  '19',  '24000',  '30',  '30yearold',  '6.1',  '600000',  '74th',  'abilities',  'able',  'account',  'actually.1',  'administrator',  'adventure.1',  'affair',  'affecting',  'affections',  		'afraid',  'africa',  'age.1',  'agency',  'agent.2',  'ahead',  'ahead.1',  'aided',  'alaska',  'alberts',  'alexei',  'alice',  'alien',  'allows',  'altered',  'alzheimers',  'amelia.1',  'amid',  'amidst',  		'angeles.1',  'animals',  'anna',  'announce',  'anymore',  'apprentice',  'archer',  'archie',  'area',  'arrest',  'asia',  'ask',  'asks',  'assigning',  'attitudes',  'attorney',  'available',  'awakens',  		'aware',  'away',  'awesome',  'babadook.1',  'badges',  'bajaj',  'ball.1',  'band',  'bartender',  'batman',  'battle',  'beach.1',  'beautiful',  'begin',  'big',  'bigfoot',  'biggest',  'billionaire',  		'birth',  'blair',  'blamed',  'blinded',  'blue',  'blurs',  'body.1',  'boggarts',  'boot',  'booth',  'born',  'bourne',  'boys.2',  'bradley',  'bringing',  'bros',  'bumpy',  'bury',  'busan',  'busboy',  		'buying',  'cade',  'california',  'called',  'came',  'capitol',  'captain',  'captures',  'car',  'care',  'career',  'career.1',  'cares',  'carry',  'case',  'celebrated',  'celebrity',  'center',  'certainly',  		'chairman',  'challenge',  'change',  'changes',  'chaos',  'charge',  'charity',  'charley',  'chicago',  'childhood.1',  'china',  'choice.1',  'chris',  'chungsu',  'cia',  'city',  'club',  'club.1',  'clubs',  		'cocaine',  'cofounder',  'coincidentally',  'collapses',  'collect',  'comes',  'comic',  'command',  'common',  'community',  'company',  'concert.1',  'conquer',  'consecutive',  'constant',  'contacted',  		'continuum',  'control',  'conventional',  'conversations',  'cool.1',  'cope',  'corroding',  'corrupt',  'course.1',  'courtship',  'coveted',  'crevice',  'crime',  'cross',  'crown',  'cryptanalyst',  'cuba',  		'cuca',  'culture',  'cypher',  'dad',  'dancer',  'dancer.1',  'dancers',  'daring',  'darkest',  'darko',  'daughterinlaw',  'death',  'death.1',  'decided',  'decides',  'decision',  'deepens',  'defeats',  		'defense',  'del',  'devastating',  'devoted',  'die.1',  'different',  'different.1',  'difficult',  'directed',  'dirty',  'disappears.1',  'disease',  'disguise',  'dishonorably',  'dismantle',  'divorced',  		'dj.1',  'doctor.1',  'dollars',  'donnies',  'doubts',  'dracula',  'drag',  'drawing',  'drawn',  'dread',  'drought',  'dumbest',  'dysfunctional',  'earlier',  'earth.1',  'einstein',  'elections',  'elsa',  		'eluded',  'embarks',  'endurance',  'enforcement',  'engaged',  'enraged',  'entertaining',  'entertainment',  'entertainment.1',  'entertainment.2',  'environment',  'epic',  'epidemic',  'escape',  'escape.1',  		'essential',  'ethan',  'evening',  'events.1',  'eventually',  'everdeen',  'expand',  'expect',  'extinction',  'extremely',  'exwife',  'faces',  'falling',  'falls',  'fame.1',  'famine',  'father',  'father.1',  		'fault',  'favorites',  'fellow',  'feuding',  'fight.1',  'finally',  'flamboyant',  'flames',  'fling',  'florida',  'foil',  'followed',  'follower',  'food.1',  'forestwithout',  'forget',  'formality',  		'fortyfive',  'forward',  'frances',  'frankenstein',  'fransokyo',  'frayed',  'freedom',  'french',  'friends',  'friendship',  'funeral',  'gal',  'games',  'games.2',  'garage',  'gatsby',  'generational',  		'georgina',  'ghostcommunity',  'ghosts',  'girl',  'girls',  'giving',  'good.1',  'government',  'graduated',  'grimm',  'ground',  'group.2',  'grown',  'guard',  'guests',  'gutierrez',  'guy',  'guy.1',  'haars',  		'hacker',  'halfstriped',  'hammond',  'hanna.1',  'happens',  'harrowing',  'harsh',  'having',  'hazel',  'heads',  'heart.1',  'heartland',  'heirloom',  'help',  'helping',  'herd.1',  'hero',  'hide',  'history',  		'hivaids',  'hoeks',  'home.1',  'homerun',  'honey',  'hopelessly',  'hoping',  'hotel',  'hotheaded',  'human.1',  'hunted',  'hunter',  'husband',  'hysterical',  'ill',  'imagination',  'immediately',  'immensely',  		'impaired',  'implanted',  'importer',  'importing',  'impresses',  'impromptu',  'incidents',  'incredible',  'inept',  'ineptness',  'infatuated',  'inner',  'innocent',  'inspire',  'instead',  'insults',  		'interaction',  'intricate',  'intrigued',  'isabella.1',  'islandset',  'italian',  'jack.1',  'james.1',  'japanese',  'jedediah',  'jim',  'jobs',  'john.1',  'johnnys',  'jorge',  'josephine',  'journey',  		'jules',  'jungle.1',  'junkie',  'kane',  'karenina',  'kate',  'katniss',  'katniss.1',  'kenai.1',  'khoobsurat',  'kidnapped',  'killed',  'killing',  'know',  'kodas',  'kunis',  'la.1',  'lame',  'land',  		'law',  'leach',  'lead',  'leading',  'lease',  'legendary',  'lemon',  'leopard',  'life.1',  'life.2',  'like',  'likes',  'lineaments',  'linglings',  'living',  'lizzie.2',  'locksley',  'loggerheads',  'london',  		'look',  'looks',  'lottery.1',  'love.2',  'madness',  'magnate',  'maiden',  'make',  'mammoths',  'man',  'manage',  'manages',  'manolo',  'mars.1',  'mars.2',  'marylouis',  'mastering',  'materialistic',  		'mathis',  'matters',  'mayhem.1',  'mechanic',  'medical',  'meet',  'meeting',  'merger',  'met.1',  'middleclass',  'military',  'mindset',  'mirkwood',  'mismatched',  'mobsters',  'modern',  'monroe',  		'montenegro',  'months',  'morpheus.1',  'movie.1',  'mrs',  'mushu',  'musicians',  'myriad',  'mysteriously',  'mystery',  'nailbiting',  'national',  'native',  'natural',  'needs',  'nefarious',  'neglects',  		'neighborhood',  'nerdy',  'newfound',  'news',  'nice',  'nina.1',  'northern',  'note',  'nouveau',  'novel.1',  'novels',  'november',  'obsession',  'odds',  'offensive',  'office',  'ones',  'ones.1',  'open',  		'opposition',  'opting',  'ordered',  'outcast',  'outcome.1',  'overcame',  'owner',  'owner.1',  'paid',  'pair',  'pals',  'pamela',  'pan',  'panem.1',  'paris',  'parker',  'partner',  'partner.1',  'past',  		'perfectionist',  'personification',  'peterson',  'pharaoh',  'physiotherapist',  'picture',  'places',  'play.3',  'plot',  'poet',  'policeman',  'politics',  'politics.1',  'pose',  'post',  'powerful',  'powers',  		'president',  'prey',  'prince',  'problem',  'production',  'project',  'promised',  'proper',  'provide',  'puts',  'quite',  'race.1',  'ralph.1',  'ralphs',  'ram.1',  'range',  'rationalizes',  'raucous',  		'ravaged',  'reacher.1',  'reader',  'realize',  'recklessness',  'recording',  'recovers',  'recovers.1',  'recurring',  'refugees',  'regina',  'regina.1',  'relationship',  'relief',  'remember',  'remembering',  		'remembers',  'remind',  'rene',  'renee',  'reporter',  'rescued',  'resolves',  'responsibilities',  'restaurant.1',  'retelling',  'retired.1',  'retribution.1',  'richard',  'riche',  'riding',  'rises',  'rivals',  		'riverine',  'robert',  'roberts',  'robotics',  'role',  'romance.1',  'romantic',  'rose',  'roxanne.1',  'rumors',  'ryder',  'safe',  'safely',  'salmon',  'samuels.1',  'sangha',  'sarah',  'school.1',  'scott',  		'scouted',  'scouting',  'secrets',  'seeks',  'seemingly',  'selfconfessed',  'send',  'sensation',  'sensuality',  'seriously',  'set',  'sexual',  'shape',  'shearing',  'shocked',  'sister',  'situation',  'slaves',  		'slum',  'sneaks',  'solar',  'solution.1',  'son',  'son.1',  'souls',  'spare',  'spies',  'spirit.1',  'spook',  'sportello',  'starina',  'stars',  'statement',  'states',  'status',  'stealing',  'stephen',  		'stephens',  'stir',  'stolen',  'stripes.1',  'stripped',  'student',  'students.1',  'studying',  'successful',  'suddenly',  'suffers',  'sulley',  'summary',  'sung',  'superiors',  'superstar',  'surprised',  		'survival',  'survive',  'sustain',  'sweat',  'sweeps',  'sweet',  'switch',  'sylvia',  'sympathetic',  'tablets',  'talent',  'teaching',  'team',  'televised',  'temperamental',  'thriller',  'thrown',  'time.2',  		'told',  'topsecret',  'torn',  'toro',  'tour',  'towns',  'trade.1',  'traitors',  'travels',  'triangle.1',  'trip',  'trouble',  'truest',  'truths',  'turned',  'twisted',  'ultimately',  'ultimatum',  		'unbeknownst',  'unit',  'universal',  'vain',  'valentines',  'various',  'villain',  'viralvideoed',  'wallace',  'ward',  'way',  'werewolves',  'wessex',  'west.1',  'wicked',  'widen',  'wife.1',  'wild',  		'wilde.1',  'windfall',  'winning',  'witch.2',  'woods.1',  'wooed',  'world',  'world.2',  'writer',  'writing.1',  'yeager',  'yeager.1',  'years.1',  'young.1',  'youngja',  'zoologists'], 
                        ['118th',  '15',  '15year',  '1895',  '1950',  '24000',  '30',  '3d',  '6',  '70s',  'abandon',  'academic',  'accident.1',  'accused',  'accuseds',  'actionpacked',  'activists',  'actorgigolo',  'adolescent',  		'advancement',  'adventure.1',  'affairs',  'affections',  'agent',  'ago',  'agree',  'ahkmenrah',  'albert',  'alberts',  'alice',  'ally',  'alongside',  'amelia',  'america',  'ancestors',  'anderson',  'appearing',  		'armed',  'army',  'arrest',  'assembly',  'astounding',  'attention',  'attila',  'attitudes',  'attorney',  'auditions',  'avoid',  'awesome',  'baby.1',  'background',  'bajaj',  'ball.1',  'baronial',  'baseball.1',  		'batman',  'battling',  'baymax',  'beach.1',  'befalls',  'befriend',  'belief',  'believes',  'best.1',  'bhoot',  'bhoothnath',  'bhoothnaths',  'big.1',  'bigfoot.1',  'birthday',  'black',  'blessings',  'bloom',  		'blue',  'blurs',  'bobby',  'body.1',  'bohemian',  'boil',  'bond.1',  'book',  'boot',  'born',  'bourne',  'boxer',  'boy.1',  'boyfriend.2',  'boys',  'boys.2',  'brass',  'break',  'british',  'brittle',  		'brutally',  'bumpy',  'burglar',  'busboy',  'business',  'buying',  'california',  'camp',  'capitol',  'captured',  'captures',  'career.1',  'carl',  'cassel',  'castle',  'catapults',  'celebrate',  'challenge',  		'chaos.1',  'charge',  'charity',  'cheer',  'chiffre',  'chiffre.1',  'chinese',  'choice.1',  'chose',  'city.2',  'citys',  'claims',  'clark',  'classmates',  'climate',  'coal',  'codebreakers',  'cold',  		'collapsed',  'collapses',  'colleagues',  'collection',  'colombo',  'comatose',  'comedyadventure',  'comes',  'committee',  'compassionate',  'comprising',  'conclusion',  'conservative',  'considerable',  		'consumed',  'control',  'converted',  'coolidge',  'corporation',  'corps',  'county',  'court',  'creating',  'crevice',  'crime.1',  'crossing',  'crude',  'cruise',  'cuba',  'curse',  'cutthroat.1',  'dads',  		'daley',  'dancer',  'day.2',  'days',  'days.1',  'deal',  'death.1',  'declared',  'defeats',  'defend',  'defense',  'deliberately',  'demands',  'descend',  'desperate',  'destroys',  'detective',  'develops',  		'devotes',  'diamond',  'died.1',  'difference',  'different',  'discovered.1',  'discovery',  'disenchanted',  'disgraced',  'dishonorably',  'dissuaded',  'divorce',  'divorced',  'dj',  'dj.1',  'doctor',  'doesnt',  		'doing',  'donnie',  'donnies',  'doubts',  'dracula',  'drag',  'dragon.1',  'dread',  'dream',  'dress',  'drives',  'drug',  'duksoo.1',  'dwarves',  'eager',  'earth',  'earthquake',  'east',  'egypt',  'elections',  		'elevator',  'eliminate',  'elite',  'emory',  'employer.1',  'encourages',  'ends',  'endurance',  'enjoying',  'entertaining',  'entertainment.1',  'entertainment.2',  'enthusiasm',  'equally',  'especially',  		'establishes',  'eternity',  'ethan',  'event',  'events.1',  'excavation',  'excited',  'exerts',  'exes',  'exhibit',  'expert.1',  'fabled',  'facing',  'fairy',  'fake',  'fame',  'family',  'family.1',  		'familyfriend',  'familys',  'fantasies',  'fantasies.1',  'fathers',  'favorites',  'fawkes',  'fears',  'ferocious',  'fighting',  'finn',  'fires',  'flames',  'fleeting',  'flight',  'flip',  'foil',  'follows',  		'footage',  'frank',  'french.1',  'friends',  'friends.1',  'friendship.1',  'fueling',  'fun',  'fund',  'funded',  'future',  'futuristic',  'gandalf',  'gangland',  'gangster',  'gastronomic',  'gatsby',  'gave',  		'gentle',  'georges',  'germany.1',  'getaway',  'gets',  'girls.3',  'gives',  'giving',  'glory',  'goal',  'goldman',  'good',  'gooding',  'grab',  'grace',  'grievers',  'group.1',  'group.2',  'grows',  'guard',  		'guile',  'gutierrez',  'gyllenhaal',  'haars',  'hand',  'hank',  'hannas',  'hard',  'harrowing',  'harsh',  'hazels',  'head',  'heading',  'heart',  'heartland',  'hed',  'helicopter',  'helps',  'henslow',  		'heroes.1',  'hit',  'hivaids',  'hoeks',  'holiday',  'honey',  'hoping',  'hotel',  'hotel.1',  'hotheaded',  'houghton',  'huge',  'humans.2',  'humor',  'humorist',  'huns',  'hunted',  'husband.1',  'ill',  		'illness.1',  'imperial',  'imprison',  'indictment',  'industrialist',  'ineptness',  'inflatable',  'innocent',  'inspire',  'instead',  'intrigue.1',  'invades',  'ironically',  'island',  'japanese',  'jay',  		'jill',  'jims',  'john',  'john.1',  'join',  'jokes',  'jonathan',  'josephine',  'jr',  'judge',  'juliet',  'juliette',  'karen',  'kates',  'katniss',  'katniss.1',  'kenny',  'kidnapped',  'kids.1',  'kills',  		'king',  'kitty.1',  'klieg',  'knee',  'know',  'known',  'koda.1',  'lack',  'lake',  'lands',  'large',  'laurence',  'learn',  'leave',  'leaves',  'leg',  'leopard',  'let',  'leveled',  'lifeand',  'lifeanddeath',  		'lights.1',  'like',  'likes',  'line.1',  'lineaments',  'lingling.2',  'linglings',  'linguistics',  'live',  'lived',  'lizzie.1',  'lok',  'london',  'long',  'long.1',  'looking',  'lookout',  'love',  		'lynd',  'machines.1',  'madness',  'maguire',  'manage',  'manages',  'manolo',  'mans',  'mantis',  'marian',  'marilyn',  'marines',  'marry',  'martins',  'mask',  'massive',  'masterpiece',  'mathis',  		'mayhem',  'mayhem.1',  'mccandless',  'mechanic',  'medications',  'meet',  'meet.1',  'mellark',  'memory',  'men',  'mia',  'miami.1',  'mighty',  'mines',  'mismatched',  'misunderstanding',  'mobsters',  		'money',  'monroe',  'monster',  'montenegro',  'moral',  'mothers',  'moulin',  'mr.1',  'mrgo',  'mummy',  'muscles',  'museum.1',  'music',  'mystery',  'named',  'narcotics',  'nation',  'native',  'near',  		'neighbor',  'neighborhood',  'nerdy',  'nina',  'nondescript',  'novels',  'november',  'oblonsky',  'odds',  'officers',  'operations',  'opposite',  'opposition',  'ops',  'optimus',  'orders',  'outcome.1',  		'outrace',  'overcome',  'owen',  'oxygen',  'pamela',  'parents.1',  'participants',  'partner.2',  'passionate',  'peace',  'peeta.2',  'pension',  'people',  'percys',  'perfectionist',  'perilous',  'perspective',  		'petersburg',  'petersen',  'pharoah',  'physiotherapist',  'pilot',  'place.1',  'plan',  'play.1',  'plot',  'politician',  'portrays',  'pose',  'possible',  'powers',  'prem.1',  'prep',  'prepares',  'present',  		'prey',  'prince',  'professional',  'program',  'promised',  'proper',  'prosthetic',  'protect',  'protected',  'providedriving',  'psychiatrist',  'pursue',  'puts',  'quality',  'questions',  'quickly.1',  'quits',  		'race.1',  'rachel',  'raises',  'rataxes',  'raucous',  'ravaged',  'read',  'readymade',  'realizes',  'recruits.1',  'redeem',  'regina',  'regina.2',  'rekall',  'releases',  'religion',  'renowned',  		'representative',  'resolves',  'result',  'retired',  'retrieve',  'rhinos',  'riding',  'rigours',  'ringmaster',  'rivalry',  'road',  'robbery',  'robert',  'roberts.1',  'robot.1',  'romantic',  'romp',  		'routine',  'royals',  'rules',  'runs',  'russia',  'sabha',  'samuels',  'satisfactorily',  'saved',  'saves',  'saxophone',  'says',  'scott',  'scraggily',  'secrecy',  'secret',  'secrets.1',  'seeing',  		'seeking',  'selfconfessed',  'senator.1',  'sends',  'sergeant',  'set',  'settlement',  'settling',  'sex',  'sexual',  'sexy',  'shakespeare',  'shakespeares',  'shanty',  'shape',  'short',  'simply',  'singing',  		'sings',  'sink',  'sixteen',  'skills',  'slamdunk',  'snow',  'soared',  'sole',  'sort',  'south',  'space',  'spacetime',  'spanish',  'spears',  'spent',  'spontaneity',  'spook.1',  'sports',  'spy',  'stage.1',  		'stand',  'starina',  'start.1',  'states',  'station',  'stepping',  'stooges',  'store',  'stories',  'story',  'strange',  'strangely',  'striking',  'stripes',  'struggle',  'students.1',  'subsequent',  'sudden',  		'suddenly',  'sulley',  'sums',  'sung',  'sung.1',  'superiors',  'sure',  'surrounds',  'survival.1',  'swan.2',  'sweet',  'swim',  'swing',  'sword',  'sympathetic',  'takeover',  'takes',  'talk',  'talks',  		'taunts',  'teacher',  'teachers',  'teams.1',  'teddy',  'televised',  'tell',  'temporal',  'terrorizes',  'tessa',  'tet',  'theres',  'things',  'thorin',  'tiny',  'told',  'toll',  'toro',  'tour.1',  'trade',  		'training',  'traitors',  'travels',  'trials',  'trip.1',  'truck',  'trusty',  'truth',  'twist',  'twisted',  'ultimatum',  'unbeknownst',  'unique',  'unit',  'universal',  'unlocks',  'unrelated',  'uprising',  		'usual',  'various',  'victims',  'victors',  'villain',  'voyage',  'vulnerable',  'wallace',  'wallace.1',  'walt',  'wanted',  'wants',  'war',  'ward',  'wasnt',  'watch',  'water',  'waterhole',  'ways',  		'wazowski',  'wealthy',  'web',  'west.1',  'whats',  'wife.1',  'wildebeest',  'winners',  'wit',  'witches',  'woods',  'words',  'words.1',  'world',  'world.1',  'worlds.1',  'writer',  'writes',  'wrong.1',  		'year',  'years',  'years.1',  'york',  'zebra',  'zellweger',  'zing'], 
                        ['states']]
        self.f_meta = [['country_UK',  'genre_Crime',  'language_Croatian',  'language_Hungarian',  'language_Italian'], 
                        ['country_China', 'country_Egypt', 'country_France', 'genre_Crime'], 
                        ['country_Egypt',  'country_France',  'country_Spain',  'genre_Crime',  'genre_Romance',  'language_Greek',  'language_Japanese',  'language_Latin',  'language_Urdu',  'rated_N/A'], 
                        ['country_Argentina',  'country_France',  'genre_Crime',  'genre_Mystery',  'language_Filipino',  'language_Japanese',  'language_Navajo',  'language_Yiddish'], 
                        ['genre_Crime'], 
                        ['country_Argentina',  'country_Ireland',  'country_Italy',  'country_Netherlands',  'genre_Crime',  'genre_Music',  'language_Danish',  'language_Greek',  'language_Mandarin',  'rated_NOT RATED'], 
                        ['country_France', 'genre_Crime', 'rated_TV-MA'], 
                        ['country_Bahamas',  'country_France',  'country_SouthAfrica',  'country_SouthKorea',  'genre_Animation',  'genre_Biography',  'genre_Crime',  'genre_Sci-Fi',  'genre_Sport',  'language_Croatian',  'language_Greek'], 
                        ['country_Argentina',  'country_Egypt',  'genre_Crime',  'genre_Horror',  'language_Hungarian']]
        
    
    def randomSet(self, S,n=0):
        Set = []
        if (n==0):
            n = len(S)
        elif (n==1):
            n = 2
        rand = random.randint(1,n-1)
        #print("randomset size: ",rand)
        return np.random.choice(S, rand ,replace=False)
    
    def getMBestFeatures(self, D,target, clfs, minf1=0):
        scoring = ['precision', 'recall', 'f1']
        f_arr=[]
        df_scores = pd.DataFrame()
        
        for clf in clfs:
            selected_features = []
            n = len(D)
            max_tries = 77*n^5
            S_best = D.columns.values
            C_best = len(S_best)
            old_f1 = minf1
            
            for i in range(1,max_tries):
                S = sorted(self.randomSet(D.columns.values, len(D.columns.values)))
                C = len(S)
                r = cross_validate(clf.clf,D[S],target, cv=10, scoring = scoring)
                new_f1 = r['test_f1'].mean()
                
                if (new_f1 > old_f1):
                    print(new_f1)
                    S_best = S
                    C_best = C
                    selected_features = S_best
                    old_f1 = new_f1
                    print("best features: ",S_best)
                elif (new_f1==old_f1 and C<C_best):
                     S_best = S
                     C_best = C
                     selected_features = S_best
                     print("better features: ", S_best)
            
            f_arr.append(selected_features)
            
        return f_arr
        
class OutputWrapper:
    
    def __init__(self, df_res_test, df_score_test, df_score_stats, df_res_all):
        self.df_res_test = df_res_test
        self.df_score_test = df_score_test
        self.df_score_stats = df_score_stats
        self.df_res_all = df_res_all
        
class Evaluator:
    # TODO add clf stacking method
    
    def __init__(self, data_wrapper, use_audio=True, use_visual=True, use_text=True, use_meta=True):
        self.data_wrapper = data_wrapper
        self.use_audio = use_audio
        self.use_visual = use_visual
        self.use_text = use_text
        self.use_meta = use_meta            
            
    def cv_modality(self, df_features, df_targets, clfs, cv=10, verbose=True, predict_all = False):
        kf = StratifiedKFold(n_splits=cv, random_state=9832432)
        df_cvs = []
        df_all_cvs = []
        i=1
        for train_index, test_index in kf.split(df_features, df_targets):
            if verbose:
                print("Performing CV fold {i}..")
            i += 1
            X_train, X_test = df_features.iloc[train_index,:], df_features.iloc[test_index,:]
            y_train, y_test = df_targets[train_index], df_targets[test_index]
            
            df_res = pd.DataFrame(y_test)
            df_res.columns = ['TARGET']
            
            # init df for prediction of all entries
            df_res_all = pd.DataFrame(df_targets)
            df_res_all.columns = ['TARGET']
            
            for clf in clfs:
                clf.fit(X_train, y_train, verbose)
                y_pred = clf.predict(X_test, verbose)
                df_res[clf.clf_name+"_"+clf.modality] = y_pred
                
                if predict_all:
                    y_pred_all = clf.predict(df_features, verbose)
                    df_res_all[clf.clf_name+"_"+clf.modality] = y_pred_all
                
            df_cvs.append(df_res)
            df_all_cvs.append(df_res_all)
        return df_cvs, df_all_cvs
            
    def cv(self, cv=10, verbose=True, predict_all=False):
        """
            set predict_all to True to also include predictions for all data
        """
        df_cvs = []
        df_all_cvs = []
        df_targets = self.data_wrapper.df_targets
        if (self.use_audio):
            df_test, df_all = self.cv_modality(
                self.data_wrapper.df_audio,
                df_targets,
                ClassifierFactory.get_audio_classifiers(),
                cv,
                verbose,
                predict_all)
            df_cvs.append(df_test)
            df_all_cvs.append(df_all)
        if (self.use_visual):
            df_test, df_all = self.cv_modality(
                self.data_wrapper.df_vis,
                df_targets,
                ClassifierFactory.get_visual_classifiers(),
                cv,
                verbose,
                predict_all)
            df_cvs.append(df_test)
            df_all_cvs.append(df_all)
        if (self.use_text):
            df_test, df_all = self.cv_modality(
                self.data_wrapper.df_txt,
                df_targets,
                ClassifierFactory.get_text_classifiers(),
                cv,
                verbose,
                predict_all)
            df_cvs.append(df_test)
            df_all_cvs.append(df_all)
        if (self.use_meta):
            df_test, df_all = self.cv_modality(
                self.data_wrapper.df_meta,
                df_targets,
                ClassifierFactory.get_metadata_classifiers(),
                cv,
                verbose,
                predict_all)
            df_cvs.append(df_test)
            df_all_cvs.append(df_all)
            
        df_c = []
        for i in range(len(df_cvs[0])):
            df_i = pd.concat([df_cvs[x][i] for x in range(len(df_cvs))], axis=1)
            df_i = df_i.loc[:,~df_i.columns.duplicated()]
            df_c.append(df_i)
            
        df_all_c = []
        if predict_all:
            for i in range(len(df_all_cvs[0])):
                df_i = pd.concat([df_all_cvs[x][i] for x in range(len(df_all_cvs))], axis=1)
                df_i = df_i.loc[:,~df_i.columns.duplicated()]
                df_all_c.append(df_i)
            
        df_res = self.evaluate(df_c)
        
        return OutputWrapper(df_c, df_res, self.overall_eval(df_res), df_all_c)

    def evaluate(self, df_res):
        """
        returns precision, recall and F1 in a DF
        returns list of DFs if df_res is list of DFs
        """
        if type(df_res) == type([]):
            return [self.evaluate_single(df_x) for df_x in df_res]
        else:
            return self.evaluate_single(df_res)

    def evaluate_single(self, df_res):
        cols = list(df_res.columns)
        cols.remove('TARGET')
        df_ev = pd.DataFrame(columns=['clf', 'precision', 'recall', 'f1'])
        for col in cols:
            prec = precision_score(df_res['TARGET'], df_res[col])
            recall = recall_score(df_res['TARGET'], df_res[col])
            f1 = f1_score(df_res['TARGET'], df_res[col])
            df_ev = df_ev.append({'clf':col, 'precision': prec, 'recall':recall, 'f1':f1}, ignore_index=True)
        return df_ev
    
    def overall_eval(self, df_results):
        df_score = pd.DataFrame(columns =['clf', 'mean_precision', 'mean_recall', 'mean_f1', 'var_precision', 'var_recall', 'var_f1', 'std_precision', 'std_recall', 'std_f1'])
        df_score['clf'] = df_results[0]['clf']
        # mean 
        df_res = reduce(lambda x, y: x.add(y, fill_value=0), df_results)
        df_score[['mean_precision', 'mean_recall', 'mean_f1']] = df_res[['precision', 'recall', 'f1']].div(len(df_results))

        # var
        df_mean = df_score[['mean_precision', 'mean_recall', 'mean_f1']]
        df_mean.columns = ['precision', 'recall', 'f1']
        df_dev = []
        for df in df_results:
            df_dev.append(np.square(df[['precision', 'recall', 'f1']].subtract(df_mean)))
        df_dev = reduce(lambda x, y: x.add(y, fill_value=0), df_dev)
        df_score[['var_precision', 'var_recall', 'var_f1']] = df_dev[['precision', 'recall', 'f1']].div(len(df_results))

        # std 
        df_score[['std_precision', 'std_recall', 'std_f1']] = np.power(df_score[['var_precision', 'var_recall', 'var_f1']], 0.5)
        
        # 95-ci
        for metric in ['precision', 'recall', 'f1']:
            l_ci = df_score['mean_'+metric] - df_score['std_'+metric]*CI_95_FACTOR/len(df_results)
            u_ci = df_score['mean_'+metric] + df_score['std_'+metric]*CI_95_FACTOR/len(df_results)
            df_score['l_95ci_'+metric] = l_ci
            df_score['u_95ci_'+metric] = u_ci

        return df_score

## Call Example

In [174]:
dw = DataWrapper(df_audio_train, df_vis_train, df_txt_train, df_meta_train, df_targets_train)
dw.generate_subspace()
ev = Evaluator(dw) # TODO preprocessing for meta needs to be done
ow = ev.cv(verbose=False, predict_all=True)

In [175]:
ow.df_res_test[0]

Unnamed: 0,TARGET,Logistic regression_Audio,Gradient Boosting Tree_Audio,k-Nearest neighbors_Visual,Decision tree_Visual,Logistic regression_Visual,SVM (Gaussian Kernel)_Visual,Random Forest_Visual,AdaBoost_Visual,Gradient Boosting Tree_Visual,...,SVM (Gaussian Kernel)_Textual,k-Nearest neighbors_Metadata,Nearest mean classifier_Metadata,Decision tree_Metadata,Logistic regression_Metadata,SVM (Gaussian Kernel)_Metadata,Bagging_Metadata,Random Forest_Metadata,AdaBoost_Metadata,Gradient Boosting Tree_Metadata
0,1,0,0,1,0,1,1,1,1,1,...,1,1,1,0,0,1,0,0,0,0
1,0,1,0,1,0,0,1,1,0,0,...,1,0,1,0,1,1,0,0,0,1
2,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
3,1,1,0,0,0,0,1,1,1,1,...,1,0,1,0,1,0,0,0,0,0
4,1,1,0,0,0,1,1,0,1,1,...,1,0,0,0,0,0,0,0,0,0
5,1,1,0,1,1,1,1,1,0,1,...,1,0,1,0,1,0,0,0,0,1
6,1,1,1,1,1,0,1,1,1,1,...,1,0,0,0,0,0,0,0,0,0
8,0,1,0,1,0,1,1,0,0,0,...,1,0,1,1,1,1,0,0,0,0
9,0,1,1,1,1,1,1,1,1,1,...,1,0,0,1,0,0,0,0,0,0
13,0,0,0,1,0,0,1,0,1,1,...,1,0,0,1,0,0,0,1,0,0


In [176]:
ow.df_score_test[0]        

Unnamed: 0,clf,precision,recall,f1
0,Logistic regression_Audio,0.555556,0.833333,0.666667
1,Gradient Boosting Tree_Audio,0.5,0.333333,0.4
2,k-Nearest neighbors_Visual,0.444444,0.666667,0.533333
3,Decision tree_Visual,0.6,0.5,0.545455
4,Logistic regression_Visual,0.571429,0.666667,0.615385
5,SVM (Gaussian Kernel)_Visual,0.545455,1.0,0.705882
6,Random Forest_Visual,0.714286,0.833333,0.769231
7,AdaBoost_Visual,0.625,0.833333,0.714286
8,Gradient Boosting Tree_Visual,0.666667,1.0,0.8
9,Naive Bayes_Textual,0.333333,0.333333,0.333333


In [13]:
ow.df_score_stats

Unnamed: 0,clf,mean_precision,mean_recall,mean_f1,var_precision,var_recall,var_f1,std_precision,std_recall,std_f1,l_95ci_precision,u_95ci_precision,l_95ci_recall,u_95ci_recall,l_95ci_f1,u_95ci_f1
0,Logistic regression_Audio,0.566508,0.57,0.547244,0.047184,0.048989,0.03009,0.217218,0.221334,0.173464,0.523933,0.609083,0.526618,0.613382,0.513245,0.581243
1,Gradient Boosting Tree_Audio,0.486429,0.416667,0.438236,0.050547,0.054944,0.045175,0.224826,0.234402,0.212543,0.442363,0.530494,0.370724,0.46261,0.396578,0.479895
2,k-Nearest neighbors_Visual,0.568492,0.633333,0.588811,0.015035,0.035778,0.017594,0.122618,0.18915,0.132641,0.544459,0.592525,0.59626,0.670407,0.562814,0.614809
3,Decision tree_Visual,0.550476,0.623333,0.581818,0.023185,0.043567,0.0302,0.152265,0.208726,0.173782,0.520632,0.58032,0.582423,0.664244,0.547757,0.615879
4,Logistic regression_Visual,0.613571,0.636667,0.608761,0.033807,0.0481,0.027895,0.183868,0.219317,0.167017,0.577533,0.649609,0.593681,0.679653,0.576025,0.641496
5,SVM (Gaussian Kernel)_Visual,0.543434,0.983333,0.699755,0.000481,0.0025,0.00082,0.021922,0.05,0.028633,0.539138,0.547731,0.973533,0.993133,0.694143,0.705367
6,Random Forest_Visual,0.619762,0.653333,0.629573,0.006054,0.0196,0.008414,0.077811,0.14,0.09173,0.604511,0.635013,0.625893,0.680773,0.611594,0.647552
7,AdaBoost_Visual,0.584643,0.61,0.593169,0.018714,0.025789,0.019354,0.136799,0.160589,0.139118,0.55783,0.611455,0.578525,0.641475,0.565902,0.620436
8,Gradient Boosting Tree_Visual,0.594524,0.726667,0.64641,0.009215,0.0324,0.012671,0.095997,0.18,0.112566,0.575708,0.613339,0.691387,0.761947,0.624347,0.668473
9,Naive Bayes_Textual,0.535,0.62,0.550276,0.036233,0.055156,0.022024,0.190351,0.234852,0.148407,0.497691,0.572309,0.573969,0.666031,0.521189,0.579364


In [14]:
ow.df_res_all[0]

Unnamed: 0,TARGET,Logistic regression_Audio,Gradient Boosting Tree_Audio,k-Nearest neighbors_Visual,Decision tree_Visual,Logistic regression_Visual,SVM (Gaussian Kernel)_Visual,Random Forest_Visual,AdaBoost_Visual,Gradient Boosting Tree_Visual,...,SVM (Gaussian Kernel)_Textual,k-Nearest neighbors_Metadata,Nearest mean classifier_Metadata,Decision tree_Metadata,Logistic regression_Metadata,SVM (Gaussian Kernel)_Metadata,Bagging_Metadata,Random Forest_Metadata,AdaBoost_Metadata,Gradient Boosting Tree_Metadata
0,1,0,0,1,0,1,1,1,1,1,...,1,1,1,0,0,1,0,0,0,0
1,0,1,0,1,0,0,1,1,0,0,...,1,0,1,0,1,1,0,0,0,1
2,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
3,1,1,0,0,0,0,1,1,1,1,...,1,0,1,0,1,0,0,0,0,0
4,1,1,0,0,0,1,1,0,1,1,...,1,0,0,0,0,0,0,0,0,0
5,1,1,0,1,1,1,1,1,0,1,...,1,0,1,0,1,0,0,0,0,1
6,1,1,1,1,1,0,1,1,1,1,...,1,0,0,0,0,0,0,0,0,0
7,1,0,1,1,1,1,1,1,1,1,...,1,0,0,1,1,1,1,1,1,1
8,0,1,0,1,0,1,1,0,0,0,...,1,0,1,1,1,1,0,0,0,0
9,0,1,1,1,1,1,1,1,1,1,...,1,0,0,1,0,0,0,0,0,0
