<a href="https://colab.research.google.com/github/VisionLogic-AI/Brain_Projects/blob/master/OHBM_Modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!wget -nc https://storage.googleapis.com/ohbm-dl-lindsay-data/trunc_data.pkl

--2020-05-18 14:55:35--  https://storage.googleapis.com/ohbm-dl-lindsay-data/trunc_data.pkl
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.23.128, 2404:6800:4008:c02::80
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.23.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 157200207 (150M) [application/octet-stream]
Saving to: ‘trunc_data.pkl’


2020-05-18 14:55:38 (68.0 MB/s) - ‘trunc_data.pkl’ saved [157200207/157200207]



In [3]:
!wget -nc https://raw.githubusercontent.com/arokem/2019-OHBM-DL-educourse-Lindsay/master/mnistData.py

--2020-05-18 14:55:48--  https://raw.githubusercontent.com/arokem/2019-OHBM-DL-educourse-Lindsay/master/mnistData.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1954 (1.9K) [text/plain]
Saving to: ‘mnistData.py’


2020-05-18 14:55:48 (30.3 MB/s) - ‘mnistData.py’ saved [1954/1954]



In [4]:
#use tensorflow version 1.x and not 2.x
%tensorflow_version 1.x
import tensorflow as tf
print(tf.__version__)

TensorFlow 1.x selected.
1.15.2


In [0]:
import numpy as np
import matplotlib.pyplot as plt
import mnistData as mnist

np.random.seed(234)

batch_size= 128     #number of training images for each batch
im_size= 28    #the images are 28x28 pixels
num_classes= 10    #there are 10 digit classes

data= mnist.DataObject(batch_size)

In [0]:
#these functions create weight and bias variations for the layers
def weight_variable(shape, vname):
  initial= tf.truncated_normal(shape, stddev= 0.05)
  return tf.Variable(initial, name= vname)

def bias_variable(shape, vname):
  initial= tf.constant(0.0, shape= shape)
  return tf.Variable(initial, name=vname)

def attn_input(vals, shape):
  if len(shape)==4:
    evals= tf.expand_dims(tf.expand_dims(tf.expand_dims(vals,0),0),0)
  else:
    evals= tf.expand_dims(vals, 0)

    shape[-1]= 1
    return tf.tile(evals, shape)

#The following functions create the different kinds of layers that will make up the network.
#In tensorflow, networks are represented as graphs, with nodes representing different computations.
def conv_layer(conv_input, filt_shape, TCs, conv_num):
  w_conv= weight_variable(filt_shape, 'conv_w'+str(conv_num))
  b_conv= bias_variable([filt_shape[-1]], 'conv_b'+str(conv_num))
  h_conv= tf.nn.relu(tf.nn.conv2d(conv_input, w_conv, strides=[1,1,1,1], padding= 'SAME') + b_conv, 'conv'+str(conv_num))
  out_shape= h_conv.get_shape().as_list()
  attn= attn_input(TCs, out_shape)
  return tf.multiply(h_conv, attn)

def pooling_layer(pool_input, ksize, pool_num):
  h_pool= tf.nn.max_pool(pool_input, ksize=ksize, strides=[1,2,2,1], padding= 'SAME', name= 'pool'+str(pool_num))
  return h_pool

def fullyconnected_layer(fc_input, inp_units, out_size, TCs, fc_num):
  inp_shape= tf.shape(fc_input);
  inp_dimens= tf.shape(inp_shape);

  w_fc= weight_variable([inp_units, out_size], 'fc_w'+str(fc_num))
  b_fc= bias_variable([out_size], 'fc_b'+str(fc_num))

  inp_flat= tf.reshape(fc_input, [batch_size, -1])
  h_fc= tf.nn.relu(tf.matmul(inp_flat, w_fc) + b_fc, 'fc'+str(fc_num))
  out_shape= h_fc.get_shape().as_list()

  attn= attn_input(TCs, out_shape)
  h_fc_attn= tf.multiply(h_fc, attn)
  h_fc_drop= tf.nn.dropout(h_fc_attn, keep_prob)

  return h_fc_drop

def readout_layer(ro_input, inp_units, out_size):
  inp_shape= tf.shape(ro_input);
  inp_dimens= tf.shape(inp_shape);

  w_fc= weight_variable([inp_units, out_size], 'ro_w')
  b_fc= bias_variable([out_size], 'ro_b')

  inp_flat= tf.reshape(ro_input, [batch_size, -1])
  h_fc= tf.nn.relu(tf.matmul(inp_flat, w_fc) + b_fc, 'ro')
  return h_fc


#In tensorflow, inouts are given as place holders. This takes the place of the actual images that will be used when we wrun the network later
x= tf.placeholder(tf.float32, [batch_size, im_size**2])
x_image= tf.reshape(x, [-1, im_size,im_size, 1])
keep_prob= tf.placeholder(tf.float32)  #for adding dropout

y_int= tf.placeholder(tf.int32, [batch_size])   #this is the placeholder for the output that is the digit label for each image
y_= tf.one_hot(y_int, num_classes)

In [0]:
#Build our CNN model
FM_1= 32   #number of features at each layer
FM_2= 64
FM_3= 1024

#these will be filled with digit specific attention values:
tvals_1= tf.placeholder(tf.float32, [FM_1])
tvals_2= tf.placeholder(tf.float32, [FM_2])
tvals_3= tf.placeholder(tf.float32, [FM_3])

#Building the network
conv_1= conv_layer(x_image, [3,3,1, FM_1], tvals_1, 1)
pool_1= pooling_layer(conv_1, [1,3,3,1], 1)
conv_2= conv_layer(pool_1, [3,3, FM_1, FM_2], tvals_2, 2)
pool_2= pooling_layer(conv_2, [1,3,3,1], 2)
flatten_units= np.prod(pool_2.get_shape().as_list()[1:])
fc_1= fullyconnected_layer(pool_2, flatten_units, FM_3, tvals_3,1)
readout= readout_layer(fc_1, FM_3, num_classes)

In [0]:
#comparing the activity of the final with the true digit label using cross entropy loss:
cross_entropy= tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits= readout, labels= y_))

#using the ADAM optimizer that implements a modified backpropagation
train_step= tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

#calculating how mahy images were classifed correctly:
preds= tf.argmax(readout, 1)
correct_prediction= tf.wqual(preds, tf.argmax(y_1))
accuracy= tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

#what the images look like:
plt.figure()
for i in range(10):
  plt.subplot(2, 5, i + 1)
  plt.imshow(np.reshape(data.val_images[i, :], [im_size, im_size]), cmap= 'bone')

#Train Model
The tensorflow graph is now built. The next thing to do is start a tensorflow session and begin training the model

In [0]:
#start tf session and initialize variables
sess= tf.InteractiveSession()
sess.run(tf.global_variables_intializer())

val_acc= 0.; val_curve= [];  #we will keep track of the validation set accuracy
cur_batch= -1 #this is the current batch number we are on

#getting the validatio set
ims_val= data.val_images[0:batch_size,:]
labs_val= data.val_labels[0:batch_size]

#training loop:
#the attention values are set to 1 during training
while (val_acc < .95 and cur_batch < 3000):
  cur_batch+=1
  ims, labs= data.get_trainbatch()
  train_step.run(feed_dict= {x: ims, y_int: labs, tvals_1: np.ones(FM_1), tvals_2: np.ones(FM_2), tvals_3: np.ones(CFM_3), keep_prob: .5})

  if cur_batch%50==0:
    print('Batch ' + str(cur_batch))
    
    #calcuate validation accuracy
    val_acc= accuracy.eval(feed_dict= {x:ims_val, y_int:labs_val, tvals_1: np.ones(FM_1), tvals_2: np.ones(FM_2), tvals_3: np.ones(FM_3), keep_prob: 1.})
    val_curve.append(val_acc)


#plot the validation accuracy as a function of training batch
plt.plot(np.arange(len(val_curve))*100, val_curve)
plt.show()

#Now Run the Network with Attention
For this we need to make noisy images

In [0]:
def make_imageset(ims, labs, dig, noise_sc1=1.):
  dig_infs= np.where(labs==dig)[0][0: batch_size//2]
  nondig_inds= np.where(labs!=dig)[0][-0: batch_size//2]
  inds= np.concatenate([dig_infs, nondig_inds])
  dig_ims= ims[inds,:]
  dig_ims += np.random.randn(dig_ims.shape[0], dig_ims.shape[1])*noise_sc1
  return dig_ims

def make_tvals(TCs, dig):
  #this uses tuning information to determine how to scale the networks activity
  tvals= TCs[dig,:]
  tvals[np.isnan(tvals)]= 0
  tvals +=1
  tvals[tvals<0]= 0
  return tvals_1

attn_dig= 4    #digit attention is applied to
attn_layer= 2   #layer attention is applied to

#getting noisy test set
ims_noise= make_imageset(data.val_images[batch_size:, :], data.val_labels[batch_size:], attn_dig, noise_sc1= 1.5)

#example if noisy images...first half contain the attended digit, second half do not)
plt.figure()
plt.subplot(1,2,1)
plt.imshow(np.reshape(ims_noise[0,:], [im_size, im_size]), cmap= 'bone'); plt.show()
plt.subplot(1,2,2)
plt.imshow(np.reshape(ims_noise[batch_size//2,:], [im_size, im_size]), cmap= 'bone'); plt.show()

In [0]:
#In this loop, attention is applied with increasing strength, and true and false positive rates are calculated
strngs= np.arange(0, 1, .1)
TPs= [];, FPs= []
for strng in strngs:
  tvals= make_tvals(tuning_curves[attn_layer-1]*strng, attn_dig)
  if attn_layer== 1:
    predictions= preds.eval(feed_dict= {x: ims_noise, tvals_1: tvals, tvals_2: np.ones(FM_2), tvals_3:np.ones(FM_3), keep-prob: 1.})

  elif attn_layer ==2:
    predictions= preds.eval(feed_dict= {x: ims_noise, tvals_1: no.ones(FM_1), tvals_2: tvals, tvals_3: np.ones(FM_3), keep_prob: 1.})
  
  elif attn_layer ==3: 
    predictions= preds.eval(feed_dict= {x:ims_noise, tvals_1: np.ones(FM_1), tvals_2: np.ones(FM_2), tvals_3: tvals, keep_prob: 1.})

  TPs.append(np.sum(predictions[0: batch_size//2]==attn_dig)/(len(predictions)//2))
  Fps.append(np.sum(predictions[batch_size//2:]==attn_dig)/len(predictions)//2)

  print(strng, predictions)

#Results of applying attention on performance:
plt.figure(figsize= (12,6))
plt.subplot(1,2,1)
plt.plot(strngs, TPs)
plt.plot(strngs, FPs)
plt.xlabel('Strength Attention')
plt.ylabel('Rate')
plt.legend(['Truw +', 'False +'])
plt.subplot(1,2,2)
plt.plot(strngs, TPs,-TPs[0])
plt.plot(strngs, FPs-FPs[0])
plt.xlabel('Strength of Attention')
plt.ylabel('Rate Change')
plt.legend(['True +', 'False +'])

plt.show()