In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

plt.rcParams['figure.figsize'] = (10, 10)        # large images
plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray'  # use grayscale output rather than a (potentially misleading) color heatmap

import sys
caffe_root = '../'  # this file should be run from {caffe_root}/examples (otherwise change this line)
sys.path.insert(0, caffe_root + 'python')
import os
import caffe

caffe.set_device(0)  # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()

model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'

In [2]:
net = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)     # use test mode (e.g., don't perform dropout)

In [3]:
################################THESE CODES############################################
# load the mean ImageNet image (as distributed with Caffe) for subtraction
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values
#print 'mean-subtracted values:', zip('BGR', mu)

# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR

In [4]:
# set the size of the input (we can skip this if we're happy
#  with the default; we can also change it later, e.g., for different batch sizes)
net.blobs['data'].reshape(50,        # batch size
                          3,         # 3-channel (BGR) images
                          227, 227)  # image size is 227x227

In [5]:
for i in range(1,7001):
    print('working on training image ',i)
    imageName = str(i).zfill(5)
    image = caffe.io.load_image(caffe_root + 'examples/images/joey/'+imageName+".jpg")
    transformed_image = transformer.preprocess('data', image)
    # copy the image data into the memory allocated for the net
    net.blobs['data'].data[...] = transformed_image

    ### perform classification
    output = net.forward()
    output_prob = output['pool5'][0]  # the output probability vector for the first image in the batch
    vector_out = output['pool5'].reshape(output['pool5'].shape[0]*output['pool5'].shape[1]*output['pool5'].shape[2]*output['pool5'].shape[3],1)
    if i==1:
        result = vector_out
    else:
        result = np.concatenate((result, vector_out), axis=1)
result = result.T
print (result.shape)

for i in range(1,971):
    print('working on test image ',i)
    imageName = str(i).zfill(5)
    image = caffe.io.load_image(caffe_root + 'examples/images/Test/'+imageName+".jpg")
    transformed_image = transformer.preprocess('data', image)
    # copy the image data into the memory allocated for the net
    net.blobs['data'].data[...] = transformed_image

    ### perform classification
    output = net.forward()
    output_prob = output['pool5'][0]  # the output probability vector for the first image in the batch
    vector_out = output['pool5'].reshape(output['pool5'].shape[0]*output['pool5'].shape[1]*output['pool5'].shape[2]*output['pool5'].shape[3],1)
    if i==1:
        result_test = vector_out
    else:
        result_test = np.concatenate((result_test, vector_out), axis=1)
result_test = result_test.T
print (result_test.shape)




('working on training image ', 1)
('working on training image ', 2)
('working on training image ', 3)
('working on training image ', 4)
('working on training image ', 5)
('working on training image ', 6)
('working on training image ', 7)
('working on training image ', 8)
('working on training image ', 9)
('working on training image ', 10)
('working on training image ', 11)
('working on training image ', 12)
('working on training image ', 13)
('working on training image ', 14)
('working on training image ', 15)
('working on training image ', 16)
('working on training image ', 17)
('working on training image ', 18)
('working on training image ', 19)
('working on training image ', 20)
('working on training image ', 21)
('working on training image ', 22)
('working on training image ', 23)
('working on training image ', 24)
('working on training image ', 25)
('working on training image ', 26)
('working on training image ', 27)
('working on training image ', 28)
('working on training image '

In [7]:
from numpy import genfromtxt
train_data = genfromtxt(caffe_root + 'examples/images/Files/train.csv', delimiter=',')
#clean_data = my_data[1:,1]
clean_data = train_data[1:7001,1]
#test_data = train_data[1:971,1]
clean_data = clean_data.reshape(-1)
#test_data = test_data.reshape(test_data.shape[0],1)
print ('result shape ',result.shape)
print ('clean data shape ',clean_data.shape)
#print (test_data.shape)

('result shape ', (7000, 460800))
('clean data shape ', (7000,))


In [8]:
# import tensorflow as tf
# print("1")
# # Set parameters
# learning_rate = 0.01
# training_iteration = 30
# batch_size = 100
# display_step = 2
# print("2")
# # TF graph input
# x = tf.placeholder("float", [None, 460800]) # mnist data image of shape 28*28=784
# y = tf.placeholder("float", [None, 7]) # 0-9 digits recognition => 10 classes

# # Set model weights
# W = tf.Variable(tf.zeros([460800, 7]))
# b = tf.Variable(tf.zeros([7]))
# print("3")
# # Construct a linear model
# model = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax

# # Minimize error using cross entropy
# # Cross entropy
# cost_function = -tf.reduce_sum(y*tf.log(model)) 
# # Gradient Descent
# optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)

# # Initializing the variables
# init = tf.initialize_all_variables()
# print("4")
# # Launch the graph
# with tf.Session() as sess:
#     sess.run(init)
#     print("5")
#     # Training cycle
#     for iteration in range(training_iteration):
#         avg_cost = 0.
#         total_batch = int(result.shape[0]/batch_size)
#         # Loop over all batches
#         for i in range(total_batch):
#             start = i * batch_size
#             end  = min(result.shape[0], (i + 1) * batch_size)
#             batch_xs = result[start:end]
#             batch_ys = clean_data[start:end]
#             # Fit training using batch data
#             sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
#             # Compute average loss
#             avg_cost += sess.run(cost_function, feed_dict={x: batch_xs, y: batch_ys})/total_batch
#         # Display logs per eiteration step
#         if iteration % display_step == 0:
#             print ("Iteration:", '%04d' % (iteration + 1), "cost=", "{:.9f}".format(avg_cost))

#     print ("Tuning completed!")

#     # Test the model
#     predictions = tf.equal(tf.argmax(model, 1), tf.argmax(y, 1))
#     # Calculate accuracy
#     accuracy = tf.reduce_mean(tf.cast(predictions, "float"))
#     print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))

In [9]:
import pandas as pd
import sklearn
from sklearn import cross_validation
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import SVC, LinearSVC
import matplotlib.pyplot as plt
%matplotlib inline
import pickle

clf = LinearSVC(C=1.0, loss='squared_hinge', penalty='l2',multi_class='ovr')
clf.fit(result, clean_data)
y_pred = clf.predict(result_test)

filename = "predict.csv"
f = open(filename, "w")
f.write('Id,Prediction\n')

if ((len(y_pred))<1000):
    zeros = np.zeros(2000)
    y_pred = np.append(y_pred, zeros).reshape(-1)

for i in range(0,len(y_pred)):
    d = '{0},{1}\n'.format(i+1,int(y_pred[i]))
    f.write(d)


#print("Accuracy: {0:0.1f}%".format(accuracy_score(test_data,y_pred)*100))



In [18]:
# from tempfile import TemporaryFile
# outfile = TemporaryFile()
# np.savez(outfile,result=result,result_test=result_test)

#code to load file
# npzfile = np.load(outfile)
# result = npzfile['result']
# result_test = npzfile['result_test']

resultName = "Input matrix"
np.savez_compressed(resultName,result)

resultName = "Test matrix"
np.savez_compressed(resultName,result_test)
#print (result.shape)


IOError: 3225600000 requested and 3183194092 written