In [1]:
%matplotlib inline

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from msslib.utils import *

import random

from pystruct.models import GridCRF
import pystruct.learners as ssvm
from pystruct.utils import SaveLogger

from sklearn.decomposition import PCA

import datetime
import random
import pickle

In [13]:
test_runs = ["B","C","D","E","F"]

In [14]:
unwanted_recto = ["VA223RN","VA327RN","VA001RN","VA002RN","VA006RN","VA007RN","VA008RN","VA009RN"]

def get_set_paths(directory):
    return sorted(list(filter(lambda x: "RN" in x and not any(s in x for s in unwanted_recto) , listpaths(directory))))

In [15]:
label_dir = "/home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/"
data_dir = "/home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/data/"    

label_paths = get_set_paths(label_dir)
data_paths = get_set_paths(data_dir)

path_pairs = list(zip(label_paths, data_paths))
print(len(path_pairs))

150


In [16]:
for letter in test_runs:
    split_at = 20
    random.shuffle(path_pairs)
    train = path_pairs[:split_at]
    test = path_pairs[split_at:]
    
    oot_path = os.path.join("/home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/PCA_20_Grid_CRF", letter)
    mkdir(oot_path)
 
    crf = GridCRF(neighborhood=4)
    clf = ssvm.OneSlackSSVM(model=crf,n_jobs=-1, C=100, inference_cache=100, tol=.1,
                           logger=SaveLogger(oot_path+'.pickle', save_every=100))
    
    pca = PCA(n_components=20)
    print(datetime.datetime.now())
    print("Training PCA %s" %letter)
    for l, d in train:
        X = flatten_vector_matrix(np.load(d))
        #y = np.squeeze(flatten_vector_matrix(np.load(l)))
        pca.fit(X)
  
    print("Training %s" %letter)
    train_X = []
    train_y = []
    for l, d in train:
        print("Training on %s" %l)
        pre_X = pca.transform(flatten_vector_matrix(np.load(d)))
        train_X.append(pre_X.reshape(60,45,20))
        train_y.append(np.squeeze(np.load(l)).astype('int'))
    print(np.array(train_X).shape)
    clf.fit(train_X, train_y)
    print(datetime.datetime.now())

    pickle.dump(clf, open(os.path.join(oot_path + "_clf.pkl"), "wb" ) )
    print("Testing %s" %letter)
    test_results = []
    for l, d in test:
        pre_X = pca.transform(flatten_vector_matrix(np.load(d)))
        X = pre_X.reshape((60,45, pre_X.shape[1]))
        y = np.squeeze(flatten_vector_matrix(np.load(l)))
        pred = np.asarray(clf.predict([X]))
        test_results.append({
            'name': only_basename(l),
            'pred': pred.reshape(60,45),
            })

    print("Saving %s" %letter)
    for result in test_results:
        path = format_path(oot_path, 'npy', result['name'])
        np.save(path, scale_matrix(result['pred'], 20))
    print(datetime.datetime.now())


2016-06-04 11:39:30.901720
Training PCA B
Training B
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA012RN-0013.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA200RN-0201.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA263RN-0433.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA240RN-0241.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA147RN-0148.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA152RN-0324.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA113RN-0285.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA299RN-0469.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA216RN-0217.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA265RN-0435.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA068RN

  self.w = np.zeros(self.model.size_joint_feature)
  constraints.append((np.zeros(self.model.size_joint_feature), 0))
  joint_feature_ = np.zeros(self.size_joint_feature)
  joint_feature_acc = np.zeros(self.model.size_joint_feature)


2016-06-04 21:17:17.172964
Testing B
Saving B
2016-06-04 21:22:38.406085
2016-06-04 21:22:38.425089
Training PCA C
Training C
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA022RN-0023.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA120RN-0121.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA192RN-0193.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA220RN-0391.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA023RN-0024.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA275RN-0445.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA274RN-0444.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA175RN-0176.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA238RN-0239.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA248RN-0249.npy
Trai

Testing A
Saving A
2016-06-04 11:29:16.475315


In [10]:
import pickle

In [11]:
pickle.dump(clf, open(os.path.join(oot_path, letter + "_clf.pkl"), "wb" ) )

