Skip to content

Commit

Permalink
adding the source code
Browse files Browse the repository at this point in the history
  • Loading branch information
edouardoyallon committed Nov 15, 2016
1 parent fed199c commit 7997501
Show file tree
Hide file tree
Showing 10 changed files with 1,506 additions and 2 deletions.
14 changes: 12 additions & 2 deletions README.md
@@ -1,2 +1,12 @@
# deep_separation_contraction
This is the code for the CVPR17 paper "Building a Regular Classification Boundary with Deep Networks"
# Building a Regular Classification Boundary with Deep Networks
This is the code for the CVPR17 paper "Building a Regular Classification Boundary with Deep Networks" by Edouard Oyallon. A large part of the code is inspired from https://github.com/bgshih/tf_resnet_cifar yet it has been modified a lot.

To run all the experiments and obtain the figure of the paper, you can simply do:

bash script_nonlinearity_alpha.bash
python build_figure_paper.py

The best accuracy on CIFAR10 should be 95.4, and on CIFAR100 it should be 79.6, with n_channel equal to 512, alpha=1.0.

# Acknowledgement
Code modified by Edouard Oyallon
242 changes: 242 additions & 0 deletions build_figure_paper.py
@@ -0,0 +1,242 @@
from __future__ import division

import sys
import os
import time
import math
import ipdb
from datetime import datetime
import numpy as np
import tensorflow as tf
#from tensorflow.python import control_flow_ops
import joblib
import numpy as np
from matplotlib.ticker import ScalarFormatter
import matplotlib.pyplot as plt
import model_resnet as m
import model_utils as mu

FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('load_dir', '', '')
tf.app.flags.DEFINE_integer('residual_net_n', 2, '')
tf.app.flags.DEFINE_string('dataset', 'cifar10', 'cifar10 or cifar100')
tf.app.flags.DEFINE_string('train_tf_path', 'data/train.tf', '')
tf.app.flags.DEFINE_string('val_tf_path', 'data/test.tf', '')
tf.app.flags.DEFINE_string('mean_std_path', 'data/meanstd.pkl', '')
tf.app.flags.DEFINE_integer('train_batch_size', 128, '')
tf.app.flags.DEFINE_integer('val_batch_size', 100, '')
tf.app.flags.DEFINE_float('weight_decay', 2e-4, 'Weight decay') #2e-4 : 85.5%
tf.app.flags.DEFINE_float('alpha', 0, 'Degree of non-linearity') #2e-4 : 85.5%
tf.app.flags.DEFINE_integer('summary_interval', 100, 'Interval for summary.')
tf.app.flags.DEFINE_integer('val_interval', 1000, 'Interval for evaluation.')
tf.app.flags.DEFINE_integer('max_steps', 121101, 'Maximum number of iterations.')
tf.app.flags.DEFINE_string('log_dir', 'logs/','')#'logs_cifar10/log_%s' % time.strftime("%Y%m%d_%H%M%S"), '')
tf.app.flags.DEFINE_integer('save_interval', 5000, '')
tf.app.flags.DEFINE_integer('save_end_accuracy', 5000, '')
tf.app.flags.DEFINE_integer('file_save_acc', 5000, 'File where the accuracy, amount of non-linearity, etc, are saved')


def get_acc():
FLAGS.log_dir='logs'
FLAGS.save_fig='/users/data/oyallon/Desktop/git_thigns/paperCVPR17'
acc=[]
alpha=[1.0,0.9 ,0.8, 0.7, 0.6, 0.5,0.4,0.3,0.2,0.1,0.05,0.0]
n_channel=32
for a in range(len(alpha)):
DIR = os.path.join(FLAGS.log_dir, os.path.join(FLAGS.dataset, os.path.join(str(n_channel), str(alpha[a]))))
x=np.load(os.path.join(DIR,'acc.npy'))
acc.append(x)
print('C10,alpha: %f, k: %f, acc: %f'%(alpha[a],n_channel,x))
acc=np.array(acc)
fig= plt.plot(alpha,100*acc,'-o',color='black')


acc = []
alpha = [1.0,0.9 ,0.8, 0.7, 0.6, 0.5,0.4,0.3,0.2,0.1,0.05,0.0]
n_channel = 128
for a in range(len(alpha)):
DIR = os.path.join(FLAGS.log_dir, os.path.join(FLAGS.dataset, os.path.join(str(n_channel), str(alpha[a]))))
try:
x = np.load(os.path.join(DIR, 'acc.npy'))
acc.append(x)
print('C10,alpha: %f, k: %f, acc: %f' % (alpha[a], n_channel, x))
except:
acc.append(0.925)
print('C10,alpha: %f, k: %f, it failed' % (alpha[a], n_channel))
acc = np.array(acc)
fig = plt.plot(alpha, 100*acc,'-x',color='black')

plt.xlabel('Ratio $\\frac{k}{K}$')
plt.ylabel('% accuracy')
plt.legend(['K=32','K=128'],loc=4)
plt.ylim([60, 100])

plt.savefig(os.path.join(FLAGS.save_fig, '32channels.eps'), format='eps', dpi=1000, bbox_inches='tight')
alpha=1.0
n_channel=[16,32,64,128,256,512]
acc=[]
for a in range(len(n_channel)):
DIR = os.path.join(FLAGS.log_dir, os.path.join(FLAGS.dataset, os.path.join(str(n_channel[a]), str(alpha))))
try:
x = np.load(os.path.join(DIR, 'acc.npy'))
acc.append(x)
print('C10,alpha: %f, k: %f, acc: %f' % (alpha, n_channel[a], x))
except:
acc.append(0.1)
print('C10,alpha: %f, k: %f, it failed' % (alpha, n_channel[a]))
acc = np.array(acc)
plt.clf()
fig, ax = plt.subplots()
plt.plot(n_channel, 100*acc,'x-',color='black')
plt.xscale('log')
plt.xlabel('K')
plt.ylabel('% accuracy')

ax.xaxis.set_ticks(n_channel)
for axis in [ax.xaxis, ax.yaxis]:
axis.set_major_formatter(ScalarFormatter())
plt.xlim([16, 512])
plt.ylim([70, 100])
plt.savefig(os.path.join(FLAGS.save_fig, 'C10_nchannel.eps'), format='eps', dpi=1000, bbox_inches='tight')

plt.clf()

fig, ax = plt.subplots()
acc=[]
alpha = 1.0


for a in range(len(n_channel)):
DIR = os.path.join(FLAGS.log_dir, os.path.join('cifar100', os.path.join(str(n_channel[a]), str(alpha))))

try:
x = np.load(os.path.join(DIR, 'acc.npy'))
acc.append(x)
print('C100,alpha: %f, k: %f, acc: %f' % (alpha, n_channel[a], x))
except:
print('C100,alpha: %f, k: %f, it failed' % (alpha, n_channel[a]))

acc = np.array(acc)
plt.plot(n_channel,100* acc,'x-',color='black')
plt.ylim([30, 100])
plt.xlim([16, 512])
plt.xscale('log')
plt.xlabel('K')
plt.ylabel('% accuracy')

n_channel = [16, 32, 64, 128, 256, 512]
ax.xaxis.set_ticks(n_channel)
for axis in [ax.xaxis, ax.yaxis]:
axis.set_major_formatter(ScalarFormatter())

plt.savefig(os.path.join(FLAGS.save_fig, 'C100_nchannel.eps'), format='eps', dpi=1000, bbox_inches='tight')

plt.clf()
bins = np.linspace(0, 2., 100)
x=np.load('incorrect_renorm.npy')

for p in range(12):
# plt.hist(x[2*p], bins, color='red',alpha=0.5, label='x',normed=1)
# plt.hist(x[2*p+1], bins, alpha=0.5, label='y',normed=1)
values, base = np.histogram(x[2 * p], bins=bins)
values2, base2 = np.histogram(x[2 * p + 1], bins=bins)
e = 0.7 / 12.0
f = 0.1
c = (e * p + f, e * p + f, e * p + f)
plt.plot(base2[0:-1], np.cumsum(values2) / np.sum(values2), color=c)

for p in range(12):
# plt.hist(x[2*p], bins, color='red',alpha=0.5, label='x',normed=1)
# plt.hist(x[2*p+1], bins, alpha=0.5, label='y',normed=1)
values, base = np.histogram(x[2 * p], bins=bins)
values2, base2 = np.histogram(x[2 * p + 1], bins=bins)
e = 0.7 / 12.0
f = 0.1
c = (e * p + f, e * p + f, e * p + f)
plt.plot(base[0:-1], np.cumsum(values) / np.sum(values), '--', color=c)

# plt.xscale('log')
plt.yscale('log')
plt.ylabel('Cumulative distribution')
plt.xlabel('Distance')
d=[]
for i in range(12):
d.append('n=%i'%(i+2))

plt.legend(d, loc=4)

plt.savefig(os.path.join(FLAGS.save_fig, 'hist.eps'), format='eps', dpi=1000, bbox_inches='tight')

plt.clf()
x = np.load('acc_NN.npy')
y = np.load('acc_SVM.npy')
plt.xlabel('n')
plt.xlim([2,13])
plt.ylim([40, 100])
plt.ylabel('% accuracy')
plt.plot(np.array(range(12)) + 2,np.ones(12)*88.0,'-.', color='black')
plt.plot(np.array(range(12))+2,100*x,'x-',color='black')
plt.plot(np.array(range(12))+2,100*y,'o--',color='black')
plt.legend(['Accuracy of the CNN','NN', 'SVM'],loc=4)
plt.savefig(os.path.join(FLAGS.save_fig, 'acc.eps'), format='eps', dpi=1000, bbox_inches='tight')

plt.clf()
x = np.load('spec.npy')
plt.xlabel('Principal components')
plt.ylabel('Cumulated variance')

plt.xlim([1, 32])
for i in range(12):
a=x[i,8,:] #5avant
j=i
e=0.7/12.0
f=0.1
c=(e*j+f,e*j+f,e*j+f)
plt.plot(np.array(range(32))+1,np.cumsum(a),color=c)

d=[]
for i in range(12):
d.append('n=%i'%(i+2))

plt.legend(d, loc=1)
plt.savefig(os.path.join(FLAGS.save_fig, 'PCA.eps'), format='eps', dpi=1000, bbox_inches='tight')
plt.clf()
x = np.load('torsion.npy')

plt.xlim([2, 13])
x=x/(5000*5000/2)
for i in range(10):
a = x[:, i]
e = 0.7 / 12.0
f = 0.1
c = (e * i + f, e * i + f, e * i+ f)
plt.plot(np.array(range(12)) + 2,a, color=c)

d=[]
for i in range(10):
d.append('c=%i'%(i))

plt.legend(d, loc=3,ncol=3)
plt.xlabel('n')
plt.ylabel('Averaged distance')
plt.savefig(os.path.join(FLAGS.save_fig, 'torsion.eps'), format='eps', dpi=1000, bbox_inches='tight')
plt.clf()
x = np.load('SVs.npy')

for i in range(12):
a = x[i, :]
e = 0.7 / 12.0
f = 0.1
c = (e * i + f, e * i + f, e * i + f)
plt.plot(a, color=c)
plt.ylabel('|$\Gamma_n^k$|')
plt.xlabel('k')
d=[]
for i in range(12):
d.append('n=%i' % (i+2))

plt.legend(d, loc=1, ncol=3)
plt.savefig(os.path.join(FLAGS.save_fig, 'SVs.eps'), format='eps', dpi=1000, bbox_inches='tight')

if __name__ == '__main__':
get_acc()
120 changes: 120 additions & 0 deletions create_dataset.py
@@ -0,0 +1,120 @@
from __future__ import division

import sys, os, time, math
import ipdb
import pickle
import tensorflow as tf
import joblib
import numpy as np

from scipy.io import loadmat

def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('data_dir', '', 'Example: ./data_cifar10/cifar-10-batches-py/')
tf.app.flags.DEFINE_string('data_name', '', 'Example: cifar10')

# data_root = '/users/data/oyallon/resnettf/tf_resnet_cifar/mywork/src/cifar10_data/cifar-10-batches-py/'
def create_dataset(data_root,data_name):
def save_to_records(save_path, images, labels):
writer = tf.python_io.TFRecordWriter(save_path)
for i in range(images.shape[0]):
image_raw = images[i].tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(32),
'width': _int64_feature(32),
'depth': _int64_feature(3),
'label': _int64_feature(int(labels[i])),
'image_raw': _bytes_feature(image_raw)
}))
writer.write(example.SerializeToString())

# train set

train_images = np.zeros((50000,3072), dtype=np.uint8)
trian_labels = np.zeros((50000,), dtype=np.int32)
if(data_name=='cifar10'):
for i in range(5):
with open(os.path.join(data_root, 'data_batch_%d' % (i+1)),"rb") as f:
data_batch=pickle.load(f,encoding='bytes')
train_images[10000*i:10000*(i+1)] = data_batch[b'data']
trian_labels[10000*i:10000*(i+1)] = np.asarray(data_batch[b'labels'], dtype=np.int32)
train_images = np.reshape(train_images, [50000,3,32,32])
train_images = np.transpose(train_images, axes=[0,2,3,1]) # NCHW -> NHWC
save_to_records('data_'+data_name+'/train.tf', train_images, trian_labels)
elif(data_name=='cifar100'):
with open(os.path.join(data_root, 'train'), "rb") as f:
data_batch = pickle.load(f, encoding='bytes')
train_images[:,:] = data_batch[b'data']
trian_labels[:] = np.asarray(data_batch[b'fine_labels'], dtype=np.int32)
train_images = np.reshape(train_images, [50000, 3, 32, 32])
train_images = np.transpose(train_images, axes=[0, 2, 3, 1]) # NCHW -> NHWC
save_to_records('data_' + data_name + '/train.tf', train_images, trian_labels)
# mean and std
image_mean = np.mean(train_images.astype(np.float32), axis=(0,1,2))
image_std = np.std(train_images.astype(np.float32), axis=(0,1,2))
joblib.dump({'mean': image_mean, 'std': image_std}, 'data_'+data_name+'/meanstd.pkl', compress=5)

# test set
if(data_name=='cifar10'):
with open(os.path.join(data_root, 'test_batch'),"rb") as f:
data_batch=pickle.load(f,encoding='bytes')
# data_batch = joblib.load(os.path.join(data_root, 'test_batch'))
test_images = data_batch[b'data']
test_images = np.reshape(test_images, [10000,3,32,32])
test_images = np.transpose(test_images, axes=[0,2,3,1])
test_labels = np.asarray(data_batch[b'labels'], dtype=np.int32)
save_to_records('data_'+data_name+'/test.tf', test_images, test_labels)
elif(data_name=='cifar100'):
with open(os.path.join(data_root, 'test'), "rb") as f:
data_batch = pickle.load(f, encoding='bytes')
# data_batch = joblib.load(os.path.join(data_root, 'test_batch'))
test_images = data_batch[b'data']
test_images = np.reshape(test_images, [10000, 3, 32, 32])
test_images = np.transpose(test_images, axes=[0, 2, 3, 1])
test_labels = np.asarray(data_batch[b'fine_labels'], dtype=np.int32)
save_to_records('data_' + data_name + '/test.tf', test_images, test_labels)


# data_root = '/users/data/oyallon/resnettf/tf_resnet_cifar/mywork/src/cifar10_data/cifar-10-batches-py/'
def create_trainset_with_mask(mask,wheretosave,date_root):
def save_to_records(save_path, images, labels):
writer = tf.python_io.TFRecordWriter(save_path)
for i in range(images.shape[0]):
image_raw = images[i].tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(32),
'width': _int64_feature(32),
'depth': _int64_feature(3),
'label': _int64_feature(int(labels[i])),
'image_raw': _bytes_feature(image_raw)
}))
writer.write(example.SerializeToString())

# train set

train_images = np.zeros((50000, 3072), dtype=np.uint8)
trian_labels = np.zeros((50000,), dtype=np.int32)
for i in range(5):
with open(os.path.join(data_root, 'data_batch_%d' % (i + 1)), "rb") as f:
data_batch = pickle.load(f, encoding='bytes')

# batch_file=os.path.join(data_root, 'data_batch_%d' % (i+1))
# data_batch = unpickle(batch_file)#joblib.load(os.path.join(data_root, 'data_batch_%d' % (i+1)))

train_images[10000 * i:10000 * (i + 1)] = data_batch[b'data']
trian_labels[10000 * i:10000 * (i + 1)] = np.asarray(data_batch[b'labels'], dtype=np.int32)
train_images = np.reshape(train_images, [50000, 3, 32, 32])
train_images = np.transpose(train_images, axes=[0, 2, 3, 1]) # NCHW -> NHWC
train_images = train_images[mask,:,:,:]
trian_labels = trian_labels[mask]
print(train_images.shape)
save_to_records(os.path.join(wheretosave,'train.tf'), train_images, trian_labels)


if __name__ == '__main__':
create_dataset(FLAGS.data_dir,FLAGS.data_name)

0 comments on commit 7997501

Please sign in to comment.