In [1]:
%matplotlib inline

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from msslib.utils import *

import random

from pystruct.models import GridCRF
import pystruct.learners as ssvm
from pystruct.utils import SaveLogger

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

import datetime
import random
import pickle

In [3]:
test_runs = ["A","B","C","D","E","F"]

In [4]:
unwanted_recto = ["VA223RN","VA327RN","VA001RN","VA002RN","VA006RN","VA007RN","VA008RN","VA009RN"]

def get_set_paths(directory):
    return sorted(list(filter(lambda x: "RN" in x and not any(s in x for s in unwanted_recto) , listpaths(directory))))

In [5]:
label_dir = "/home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/"
data_dir = "/home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/data/"

label_paths = get_set_paths(label_dir)
data_paths = get_set_paths(data_dir)

path_pairs = list(zip(label_paths, data_paths))
print(len(path_pairs))

150


In [6]:
# I'm training LDA on this as well as the random test set to ensure I always get the same number of classes
lda_data_path = "/home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/data/VA013RN-0014.npy"
lda_label_path = "/home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA013RN-0014.npy"
lda_X = flatten_vector_matrix(np.load(lda_data_path))
lda_y = np.squeeze(flatten_vector_matrix(np.load(lda_label_path)))

In [16]:
for letter in test_runs:
    split_at = 20
    random.shuffle(path_pairs)
    train = path_pairs[:split_at]
    test = path_pairs[split_at:]
    
    oot_path = os.path.join("/home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/LDA_20_Grid_CRF", letter)
    mkdir(oot_path)
 
    crf = GridCRF(neighborhood=4)
    clf = ssvm.OneSlackSSVM(model=crf,n_jobs=-1, C=100, inference_cache=100, tol=.1,
                           logger=SaveLogger(oot_path+'.pickle', save_every=100))
    
    lda = LinearDiscriminantAnalysis()
    print(datetime.datetime.now())
    print("Training LDA %s" %letter)
    lda.fit(lda_X, lda_y)
    for l, d in train:
        X = flatten_vector_matrix(np.load(d))
        y = np.squeeze(flatten_vector_matrix(np.load(l)))
        lda.fit(X, y)
  

    print("Training %s" %letter)
    train_X = []
    train_y = []
    for l, d in train:
        print("Training on %s" %l)
        pre_X = lda.transform(flatten_vector_matrix(np.load(d)))
        train_X.append(pre_X.reshape(60,45,4))
        train_y.append(np.squeeze(np.load(l)).astype('int'))
    print(np.array(train_X).shape)
    clf.fit(train_X, train_y)
    print(datetime.datetime.now())

    pickle.dump(clf, open(os.path.join(oot_path + "_clf.pkl"), "wb" ) )
    print("Testing %s" %letter)
    test_results = []
    for l, d in test:
        pre_X = lda.transform(flatten_vector_matrix(np.load(d)))
        X = pre_X.reshape((60,45, pre_X.shape[1]))
        y = np.squeeze(flatten_vector_matrix(np.load(l)))
        pred = np.asarray(clf.predict([X]))
        test_results.append({
            'name': only_basename(l),
            'pred': pred.reshape(60,45),
            })

    print("Saving %s" %letter)
    for result in test_results:
        path = format_path(oot_path, 'npy', result['name'])
        np.save(path, scale_matrix(result['pred'], 20))
    print(datetime.datetime.now())

2016-06-05 22:57:53.944735
Training LDA A




Training A
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA288RN-0458.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA305RN-0475.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA215RN-0216.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA019RN-0020.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA275RN-0445.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA222RN-0223.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA214RN-0215.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA252RN-0253.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA167RN-0168.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA141RN-0142.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA220RN-0391.npy
Training on /home/finlay/HMT/dat

  self.w = np.zeros(self.model.size_joint_feature)
  constraints.append((np.zeros(self.model.size_joint_feature), 0))
  joint_feature_ = np.zeros(self.size_joint_feature)
  joint_feature_acc = np.zeros(self.model.size_joint_feature)


2016-06-06 01:03:59.994805
Testing A
Saving A
2016-06-06 01:06:15.827618
2016-06-06 01:06:15.845432
Training LDA B




Training B
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA160RN-0161.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA120RN-0121.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA318RN-0488.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA304RN-0474.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA031RN-0032.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA118RN-0119.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA063RN-0064.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA229RN-0400.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA285RN-0455.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA091RN-0092.npy
Training on /home/finlay/HMT/data/VenetusA/1200Hx900W/20blocks/labels/VA019RN-0020.npy
Training on /home/finlay/HMT/dat

In [10]:
pre_X.shape

(2700, 4)

In [11]:
lda.classes_

array([ 0.,  1.,  2.,  3.,  4.])

In [15]:
y.reshape(60,45).unique


AttributeError: 'numpy.ndarray' object has no attribute 'unique'