In [1]:
from __future__ import print_function, division
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as im
from PIL import Image
import skimage
from tqdm import tqdm, tqdm_notebook
import re
import math

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, datasets
import torchvision.models as models
from torch.autograd import Variable
from sklearn import linear_model
from torch import nn


%load_ext autoreload
%autoreload 2

In [2]:
data_dir = 'dataset/broden1_227/'
df = pd.read_csv(data_dir + 'processed_index.csv')
df.drop(df.columns[df.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True) # temp remove unnamed garbage col
df.insert(0, 'encoded', '')

In [3]:
def filter_by_category(df, category):
    pattern = "[%i]" % category
    return df[df['features'].str.contains(pattern, regex=False)]
    
def filter_by_not_category(df, category):
    pattern = "[%i]" % category
    return df[~df['features'].str.contains(pattern, regex=False)]

In [4]:
device = torch.device('cuda:0')

In [5]:
model = models.alexnet(pretrained=True)

model.to(device)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=4096, out_feature

In [6]:
outputs = []

def hook(module, input, output):
    layer_output = output.data.cpu()
    outputs.append(layer_output)
        
model.features[6].register_forward_hook(hook) # register hook to access specific layer
model.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=4096, out_feature

In [7]:
class CategoryDataSet(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.dataframe = df
        self.root_dir = img_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        imgfile = os.path.join(self.root_dir,
                                self.dataframe.at[idx, 'image'])
        image = Image.open(imgfile)
        
        if self.transform:
            image = self.transform(image)
            
        sample = (image, idx)
        
        return sample

In [8]:
tf = transforms.Compose([transforms.Resize(224), transforms.ToTensor()])

dataset = CategoryDataSet(df, data_dir + 'images/', transform=tf)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=30, shuffle=False, num_workers=1)

In [9]:
torch.cuda.empty_cache()

In [61]:
TRAIN_MAX_INDEX = 44403

In [10]:
outputs = []
predictions = []
# run dataset through model
for inputs, idx in tqdm(dataloader):
    inputs = inputs.to(device)
    model(inputs)

100%|██████████| 2111/2111 [04:10<00:00,  8.42it/s]


In [72]:
outputs[0].shape

torch.Size([30, 384, 13, 13])

In [11]:
stacked_outputs = torch.cat(outputs, dim=0)

In [12]:
m = nn.MaxPool2d(2)
max_pool_outputs = m(stacked_outputs)

In [13]:
max_pool_outputs.shape

torch.Size([63305, 384, 6, 6])

In [14]:
flattened_outputs = max_pool_outputs.reshape((max_pool_outputs.shape[0], np.prod(max_pool_outputs.shape[1:])))

In [15]:
# np.savetxt("flattened_pool_outputs.csv", flattened_outputs, delimiter=",")

In [16]:
flattened_outputs.shape

torch.Size([63305, 13824])

In [17]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

In [60]:
pca = PCA(n_components=100)
normalized_outputs = normalize(flattened_outputs)
pca.fit(normalized_outputs[:44403]) # just do it on training data
np.sum(pca.explained_variance_ratio_)

0.5401557038159224

In [62]:
transformed_outputs = pca.transform(normalized_outputs)

In [63]:
df['encoded'] = transformed_outputs.tolist() # insert encodings to list

In [64]:
# df.to_csv(path_or_buf=data_dir + 'processed_index_with_encodings.csv')

In [65]:
SGD_MAX_ITERATIONS = 400
SGD_LOSS = "hinge"
SGD_PENALTY = "l2"

def train_clf_for_category(df, category):
    # Filter out category data from dataframe
    pos_df = filter_by_category(df, category)
    negative_df = filter_by_not_category(df,category).sample(len(pos_df), replace=True)
    
    if(len(pos_df) == 0):
        print("Empty set found for category %i" % category)
        return(None, 0)
    
    # generate training and validation data for SGDClassifier
    train = pos_df[pos_df.split == 'train'].encoded.to_list() + negative_df.sample(len(pos_df[pos_df.split == 'train'])).encoded.to_list()
    val = pos_df[pos_df.split == 'val'].encoded.to_list() + negative_df.sample(len(pos_df[pos_df.split == 'val'])).encoded.to_list()
        
    # scale data
#     scaler = StandardScaler()
#     scaler.fit(train)  # Don't cheat - fit only on training data
#     train = scaler.transform(train)
#     val = scaler.transform(val)  # apply same transformation to test data
    
    train_labels = np.append(np.ones(len(train) // 2), np.zeros(len(train) // 2))
    val_labels = np.append(np.ones(len(val) // 2), np.zeros(len(val) // 2))
    clf = linear_model.SGDClassifier(max_iter=min(np.ceil(10**6 / len(train_labels)), SGD_MAX_ITERATIONS), loss=SGD_LOSS, early_stopping=True, penalty=SGD_PENALTY, average=True, eta0=1.5)
#     clf = LinearSVC(random_state=0, tol=1e-5)
    clf.fit(train, train_labels)
    return (clf, clf.score(val, val_labels))
    return None, 0


In [67]:
clf, score = train_clf_for_category(df, 1)
print(score)

0.8962228466443316


In [68]:
# chosen based on frequency in the dataset
MAX_CLASSES = 150
SCORE_THRESH = 0.75
sgd_classifiers = {}

In [69]:
for i in tqdm(range(1, MAX_CLASSES)):
    clf, score = train_clf_for_category(df, i)
    if(score > SCORE_THRESH):
        sgd_classifiers[i] = (clf, score)
    else:
        print("ignoring clf for category %i with score %i", (i, score))


  0%|          | 0/149 [00:00<?, ?it/s][A
  1%|          | 1/149 [00:02<06:42,  2.72s/it][A
  1%|▏         | 2/149 [00:05<06:30,  2.66s/it][A
  2%|▏         | 3/149 [00:07<06:22,  2.62s/it][A
  3%|▎         | 4/149 [00:10<06:22,  2.64s/it][A
  3%|▎         | 5/149 [00:13<06:35,  2.74s/it][A
  4%|▍         | 6/149 [00:16<06:32,  2.74s/it][A
  5%|▍         | 7/149 [00:18<06:28,  2.73s/it][A

ignoring clf for category %i with score %i (7, 0.7215756763343895)



  5%|▌         | 8/149 [00:21<06:25,  2.73s/it][A
  6%|▌         | 9/149 [00:24<06:14,  2.67s/it][A
  7%|▋         | 10/149 [00:26<06:01,  2.60s/it][A
  7%|▋         | 11/149 [00:29<05:57,  2.59s/it][A
  8%|▊         | 12/149 [00:30<05:22,  2.36s/it][A

ignoring clf for category %i with score %i (12, 0.6311242344706912)



  9%|▊         | 13/149 [00:32<04:45,  2.10s/it][A
  9%|▉         | 14/149 [00:34<04:24,  1.96s/it][A

ignoring clf for category %i with score %i (14, 0.6791631564167369)



 10%|█         | 15/149 [00:35<04:06,  1.84s/it][A

ignoring clf for category %i with score %i (15, 0.7033343720785291)



 11%|█         | 16/149 [00:37<03:54,  1.76s/it][A
 11%|█▏        | 17/149 [00:38<03:48,  1.73s/it][A

ignoring clf for category %i with score %i (17, 0.7204010184595799)



 12%|█▏        | 18/149 [00:40<03:42,  1.70s/it][A
 13%|█▎        | 19/149 [00:42<03:40,  1.70s/it][A

ignoring clf for category %i with score %i (19, 0.7239015817223199)



 13%|█▎        | 20/149 [00:43<03:33,  1.65s/it][A

ignoring clf for category %i with score %i (20, 0.7105459985041137)



 14%|█▍        | 21/149 [00:45<03:50,  1.80s/it][A

ignoring clf for category %i with score %i (21, 0.7462624584717608)



 15%|█▍        | 22/149 [00:47<03:53,  1.84s/it][A

ignoring clf for category %i with score %i (22, 0.674613987284287)



 15%|█▌        | 23/149 [00:49<03:43,  1.77s/it][A

ignoring clf for category %i with score %i (23, 0.6934147405146097)



 16%|█▌        | 24/149 [00:51<03:33,  1.71s/it][A

ignoring clf for category %i with score %i (24, 0.7035102381947347)



 17%|█▋        | 25/149 [00:52<03:39,  1.77s/it][A
 17%|█▋        | 26/149 [00:54<03:45,  1.84s/it][A
 18%|█▊        | 27/149 [00:56<03:34,  1.76s/it][A

ignoring clf for category %i with score %i (27, 0.6959161147902869)



 19%|█▉        | 28/149 [00:58<03:53,  1.93s/it][A

ignoring clf for category %i with score %i (28, 0.6925717852684145)



 19%|█▉        | 29/149 [01:00<03:38,  1.82s/it][A
 20%|██        | 30/149 [01:01<03:24,  1.72s/it][A

ignoring clf for category %i with score %i (30, 0.7115501519756839)



 21%|██        | 31/149 [01:03<03:15,  1.65s/it][A
 21%|██▏       | 32/149 [01:05<03:18,  1.70s/it][A
 22%|██▏       | 33/149 [01:06<03:19,  1.72s/it][A
 23%|██▎       | 34/149 [01:08<03:25,  1.79s/it][A

ignoring clf for category %i with score %i (34, 0.7142857142857143)



 23%|██▎       | 35/149 [01:10<03:21,  1.77s/it][A

ignoring clf for category %i with score %i (35, 0.7355898123324397)



 24%|██▍       | 36/149 [01:12<03:13,  1.71s/it][A

ignoring clf for category %i with score %i (36, 0.6878396739130435)



 25%|██▍       | 37/149 [01:13<03:04,  1.64s/it][A
 26%|██▌       | 38/149 [01:15<02:56,  1.59s/it][A
 26%|██▌       | 39/149 [01:16<02:57,  1.61s/it][A
 27%|██▋       | 40/149 [01:18<02:57,  1.63s/it][A

ignoring clf for category %i with score %i (40, 0.7335708630245448)



 28%|██▊       | 41/149 [01:19<02:52,  1.60s/it][A

ignoring clf for category %i with score %i (41, 0.7306122448979592)



 28%|██▊       | 42/149 [01:21<02:51,  1.60s/it][A
 29%|██▉       | 43/149 [01:23<02:46,  1.57s/it][A
 30%|██▉       | 44/149 [01:24<02:54,  1.66s/it][A
 30%|███       | 45/149 [01:26<02:47,  1.62s/it][A

ignoring clf for category %i with score %i (45, 0.7393278837420527)



 31%|███       | 46/149 [01:28<03:04,  1.79s/it][A

ignoring clf for category %i with score %i (46, 0.7192660550458716)



 32%|███▏      | 47/149 [01:30<03:09,  1.85s/it][A
 32%|███▏      | 48/149 [01:32<03:09,  1.87s/it][A

ignoring clf for category %i with score %i (48, 0.7072649572649573)



 33%|███▎      | 49/149 [01:34<03:11,  1.91s/it][A

ignoring clf for category %i with score %i (49, 0.6998050682261209)



 34%|███▎      | 50/149 [01:36<03:05,  1.87s/it][A
 34%|███▍      | 51/149 [01:38<02:57,  1.81s/it][A
 35%|███▍      | 52/149 [01:39<02:48,  1.74s/it][A
 36%|███▌      | 53/149 [01:41<02:41,  1.68s/it][A
 36%|███▌      | 54/149 [01:42<02:36,  1.64s/it][A
 37%|███▋      | 55/149 [01:44<02:29,  1.59s/it][A
 38%|███▊      | 56/149 [01:45<02:31,  1.63s/it][A
 38%|███▊      | 57/149 [01:47<02:33,  1.66s/it][A
 39%|███▉      | 58/149 [01:49<02:32,  1.68s/it][A
 40%|███▉      | 59/149 [01:49<01:48,  1.20s/it][A

ignoring clf for category %i with score %i (58, 0.7298578199052133)
Empty set found for category 59
ignoring clf for category %i with score %i (59, 0)



 40%|████      | 60/149 [01:50<01:50,  1.25s/it][A
 41%|████      | 61/149 [01:52<01:56,  1.32s/it][A
 42%|████▏     | 62/149 [01:53<01:54,  1.31s/it][A
 42%|████▏     | 63/149 [01:55<01:57,  1.37s/it][A

ignoring clf for category %i with score %i (63, 0.7186468646864687)



 43%|████▎     | 64/149 [01:57<02:14,  1.58s/it][A

ignoring clf for category %i with score %i (64, 0.7383512544802867)



 44%|████▎     | 65/149 [02:00<02:45,  1.97s/it][A

ignoring clf for category %i with score %i (65, 0.6978827361563518)



 44%|████▍     | 66/149 [02:01<02:32,  1.84s/it][A
 45%|████▍     | 67/149 [02:03<02:28,  1.82s/it][A
 46%|████▌     | 68/149 [02:05<02:27,  1.82s/it][A
 46%|████▋     | 69/149 [02:06<02:19,  1.74s/it][A

ignoring clf for category %i with score %i (69, 0.6711864406779661)



 47%|████▋     | 70/149 [02:08<02:22,  1.81s/it][A

ignoring clf for category %i with score %i (70, 0.6541353383458647)



 48%|████▊     | 71/149 [02:10<02:26,  1.88s/it][A

ignoring clf for category %i with score %i (71, 0.7005253940455342)



 48%|████▊     | 72/149 [02:12<02:15,  1.76s/it][A

ignoring clf for category %i with score %i (72, 0.7428884026258206)



 49%|████▉     | 73/149 [02:13<02:09,  1.71s/it][A
 50%|████▉     | 74/149 [02:15<02:12,  1.76s/it][A
 50%|█████     | 75/149 [02:17<02:03,  1.67s/it][A
 51%|█████     | 76/149 [02:18<01:58,  1.62s/it][A

ignoring clf for category %i with score %i (76, 0.7084257206208425)



 52%|█████▏    | 77/149 [02:20<01:52,  1.57s/it][A
 52%|█████▏    | 78/149 [02:21<01:52,  1.58s/it][A

ignoring clf for category %i with score %i (78, 0.7098393574297188)



 53%|█████▎    | 79/149 [02:23<01:47,  1.54s/it][A

ignoring clf for category %i with score %i (79, 0.7027310924369747)



 54%|█████▎    | 80/149 [02:24<01:50,  1.61s/it][A

ignoring clf for category %i with score %i (80, 0.6903765690376569)



 54%|█████▍    | 81/149 [02:26<01:52,  1.66s/it][A

ignoring clf for category %i with score %i (81, 0.7229038854805726)



 55%|█████▌    | 82/149 [02:28<01:57,  1.75s/it][A

Empty set found for category 83
ignoring clf for category %i with score %i (83, 0)



 56%|█████▋    | 84/149 [02:30<01:34,  1.45s/it][A

ignoring clf for category %i with score %i (84, 0.6904231625835189)



 57%|█████▋    | 85/149 [02:31<01:32,  1.45s/it][A
 58%|█████▊    | 86/149 [02:32<01:27,  1.39s/it][A

ignoring clf for category %i with score %i (86, 0.6898047722342733)



 58%|█████▊    | 87/149 [02:33<01:21,  1.31s/it][A
 59%|█████▉    | 88/149 [02:35<01:17,  1.27s/it][A
 60%|█████▉    | 89/149 [02:36<01:13,  1.23s/it][A

ignoring clf for category %i with score %i (89, 0.6520681265206812)



 60%|██████    | 90/149 [02:37<01:10,  1.20s/it][A
 61%|██████    | 91/149 [02:38<01:08,  1.18s/it][A
 62%|██████▏   | 92/149 [02:39<01:07,  1.18s/it][A

ignoring clf for category %i with score %i (92, 0.7389610389610389)



 62%|██████▏   | 93/149 [02:40<01:05,  1.16s/it][A
 63%|██████▎   | 94/149 [02:42<01:04,  1.17s/it][A
 64%|██████▍   | 95/149 [02:43<01:02,  1.16s/it][A
 64%|██████▍   | 96/149 [02:44<00:59,  1.13s/it][A
 65%|██████▌   | 97/149 [02:45<00:58,  1.13s/it][A
 66%|██████▌   | 98/149 [02:46<00:57,  1.13s/it][A
 66%|██████▋   | 99/149 [02:47<00:55,  1.10s/it][A
 67%|██████▋   | 100/149 [02:49<01:01,  1.26s/it][A

ignoring clf for category %i with score %i (100, 0.729106628242075)



 68%|██████▊   | 101/149 [02:50<00:56,  1.18s/it][A
 68%|██████▊   | 102/149 [02:51<00:52,  1.12s/it][A
 69%|██████▉   | 103/149 [02:52<00:50,  1.11s/it][A
 70%|██████▉   | 104/149 [02:53<00:49,  1.10s/it][A
 70%|███████   | 105/149 [02:54<00:46,  1.05s/it][A
 71%|███████   | 106/149 [02:55<00:43,  1.02s/it][A
 72%|███████▏  | 107/149 [02:56<00:40,  1.03it/s][A

ignoring clf for category %i with score %i (107, 0.7412587412587412)



 72%|███████▏  | 108/149 [02:57<00:40,  1.01it/s][A
 73%|███████▎  | 109/149 [02:58<00:42,  1.05s/it][A

ignoring clf for category %i with score %i (109, 0.7276264591439688)



 74%|███████▍  | 110/149 [02:59<00:44,  1.14s/it][A

ignoring clf for category %i with score %i (110, 0.710820895522388)



 74%|███████▍  | 111/149 [03:01<00:47,  1.24s/it][A

ignoring clf for category %i with score %i (111, 0.7106299212598425)



 75%|███████▌  | 112/149 [03:02<00:42,  1.15s/it][A

ignoring clf for category %i with score %i (112, 0.6902985074626866)



 76%|███████▌  | 113/149 [03:03<00:39,  1.10s/it][A

ignoring clf for category %i with score %i (113, 0.7222222222222222)



 77%|███████▋  | 114/149 [03:04<00:38,  1.09s/it][A
 77%|███████▋  | 115/149 [03:05<00:35,  1.06s/it][A
 78%|███████▊  | 116/149 [03:05<00:32,  1.03it/s][A
 79%|███████▊  | 117/149 [03:06<00:30,  1.05it/s][A
 79%|███████▉  | 118/149 [03:07<00:29,  1.04it/s][A
 80%|███████▉  | 119/149 [03:08<00:31,  1.04s/it][A

ignoring clf for category %i with score %i (119, 0.737410071942446)



 81%|████████  | 120/149 [03:09<00:29,  1.00s/it][A
 81%|████████  | 121/149 [03:11<00:30,  1.08s/it][A

ignoring clf for category %i with score %i (121, 0.6855895196506551)



 82%|████████▏ | 122/149 [03:12<00:27,  1.04s/it][A

ignoring clf for category %i with score %i (122, 0.7065637065637066)



 83%|████████▎ | 123/149 [03:13<00:26,  1.04s/it][A
 83%|████████▎ | 124/149 [03:14<00:24,  1.00it/s][A
 84%|████████▍ | 125/149 [03:14<00:23,  1.02it/s][A
 85%|████████▍ | 126/149 [03:15<00:16,  1.39it/s][A

Empty set found for category 126
ignoring clf for category %i with score %i (126, 0)



 85%|████████▌ | 127/149 [03:15<00:16,  1.35it/s][A
 86%|████████▌ | 128/149 [03:17<00:18,  1.13it/s][A
 87%|████████▋ | 129/149 [03:18<00:17,  1.11it/s][A

ignoring clf for category %i with score %i (129, 0.7401960784313726)
Empty set found for category 130
ignoring clf for category %i with score %i (130, 0)



 88%|████████▊ | 131/149 [03:19<00:14,  1.21it/s][A
 89%|████████▊ | 132/149 [03:20<00:14,  1.14it/s][A

ignoring clf for category %i with score %i (132, 0.6773504273504274)



 89%|████████▉ | 133/149 [03:21<00:14,  1.11it/s][A
 90%|████████▉ | 134/149 [03:21<00:10,  1.47it/s][A

Empty set found for category 134
ignoring clf for category %i with score %i (134, 0)



 91%|█████████ | 135/149 [03:22<00:09,  1.43it/s][A
 91%|█████████▏| 136/149 [03:23<00:09,  1.36it/s][A

ignoring clf for category %i with score %i (136, 0.7241379310344828)



 92%|█████████▏| 137/149 [03:23<00:09,  1.33it/s][A
 93%|█████████▎| 138/149 [03:24<00:08,  1.25it/s][A
 93%|█████████▎| 139/149 [03:25<00:08,  1.22it/s][A
 94%|█████████▍| 140/149 [03:26<00:07,  1.27it/s][A
 95%|█████████▍| 141/149 [03:27<00:06,  1.29it/s][A
 95%|█████████▌| 142/149 [03:27<00:05,  1.30it/s][A
 96%|█████████▌| 143/149 [03:29<00:06,  1.00s/it][A
 97%|█████████▋| 144/149 [03:30<00:05,  1.07s/it][A
 97%|█████████▋| 145/149 [03:31<00:04,  1.04s/it][A

ignoring clf for category %i with score %i (145, 0.6640211640211641)



 98%|█████████▊| 146/149 [03:32<00:02,  1.07it/s][A
 99%|█████████▊| 147/149 [03:33<00:01,  1.10it/s][A

ignoring clf for category %i with score %i (147, 0.6813186813186813)



 99%|█████████▉| 148/149 [03:33<00:00,  1.13it/s][A

ignoring clf for category %i with score %i (148, 0.7194444444444444)



100%|██████████| 149/149 [03:34<00:00,  1.20it/s][A

In [70]:
import pickle

# Store data (serialize)
with open('classifiers.pickle', 'wb') as handle:
    pickle.dump(sgd_classifiers, handle, protocol=pickle.HIGHEST_PROTOCOL)



In [71]:
len (sgd_classifiers)

88