In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import itertools

%matplotlib inline

In [2]:
mnist = pd.read_csv("data/mnist1.5k.csv.gz", compression="gzip", header=None).values
X=mnist[:,1:785]/255.
y=mnist[:,0]
print "dimension de las imagenes y las clases", X.shape, y.shape

dimension de las imagenes y las clases (1500, 784) (1500,)


In [3]:
X_train, X_test, y_train, y_test = X[:300], X[300:], y[:300], y[300:]
y_train_oh = np.eye(10)[y_train]
y_test_oh  = np.eye(10)[y_test]
print X_train.shape, y_train_oh.shape,  X_test.shape, y_test_oh.shape

(300, 784) (300, 10) (1200, 784) (1200, 10)


## Network architectures lab


from the notes we have two architectures:
    
- **A**: Three layer network accepting a 784 element vector as input and outputing a 10-class vector
- **B**: Same as **A** but accepts an additional 2 element vector with _evenness_ information that is injected at the third layer.

This lab requires you to do two things:

**1**. Create a **C** architecture similar to **B** but where the 2 element vector is injected at the second layer. This will make the network have 41,650 parameters, distributed in the following way:
   
   

        INPUT 1 to LAYER 1:              784*50 + 50 (bias) = 39250
        LAYER 1 to LAYER 2:               50*30 + 30 (bias) = 1530
        LAYER 2 + INPUT 2 to LAYER 3: (30+2)*20 + 20 (bias) = 660
        LAYER 3 to OUTPUT:                20*10 + 10 (bias) = 210
    
                                                       TOTAL 41650
                                                       
**2**. Run an experimental setup where you train different network configurations and measure the accuracy on test data. Fix the number of neurons to 50, 30 and 20 for each layer and the following combination of parameters:

- For architecture **A** (3 configurations)

    `s3_activation` $\in$ `["linear", "relu", "tanh"]`

- For architectures **B** and **C** (15 configurations for each architecture)

    `s3_activation` $\in$ `["linear", "relu", "tanh"]`
    
    `k1,k2` $\in$ `[(0,1), (-.5,2),(-.5,30), (0,15),(0,30)]`

And create a heat map showing the accuracy in test obtained for each configuration, such as the following (your results should be approximate to this):

![alt text](./Images/mm_results_1.png)


And two bar plots with the average per architecture and k1,k2 configuration, such as the following (again, your results should be approximetely similar):

![alt text](./Images/mm_results_2.png)

### Model definitions

In [4]:
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, concatenate, Input
from tensorflow.keras.backend import clear_session

In [5]:
def get_model_A(input_dim, s1, s2, s3, s3_activation="relu"):
    clear_session()
    model = Sequential()
    model = Sequential()
    model.add(Dense(s1, activation='relu', input_dim=input_dim))
    model.add(Dense(s2, activation='relu'))
    model.add(Dense(s3, activation=s3_activation))
    model.add(Dense(10, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy')
    model.reset_states()
    return model

In [6]:
def get_model_B(input_dim, extra_info_dim,  s1, s2, s3, s3_activation="relu"):
    clear_session()
    inp1 = Input(shape=(input_dim,))
    l11 = Dense(s1, activation="relu")(inp1)
    l12 = Dense(s2, activation="relu")(l11)
    l13 = Dense(s3, activation=s3_activation)(l12)
    
    inp2 = Input(shape=(extra_info_dim,))
    cc1 = concatenate([l13, inp2],axis=1) # Merge row, same column
    output = Dense(10, activation='softmax')(cc1)
    model = Model(inputs=[inp1, inp2], outputs=output)
    model.compile(optimizer='adam', loss='categorical_crossentropy')
    model.reset_states()
    return model

In [7]:
def get_model_C(input_dim, extra_info_dim, s1, s2, s3, s3_activation="relu"):
    clear_session()
    inp2 = Input(shape=(extra_info_dim,))
    inp1 = Input(shape=(input_dim,))
    l11 = Dense(s1, activation="relu")(inp1)
    l12 = Dense(s2, activation="relu")(l11)
    cc1 = concatenate([l12, inp2],axis=1) # Merge row, same column
    l13 = Dense(s3, activation=s3_activation)(l12)
    

    output = Dense(10, activation='softmax')(cc1)
    model = Model(inputs=[inp1, inp2], outputs=output)
    model.compile(optimizer='adam', loss='categorical_crossentropy')
    model.reset_states()
    return model

In [8]:
def get_X_extra(y_train, y_test, k0, k1):
    X_train_extra = (np.eye(2)[y_train%2]+k0)*k1
    X_test_extra  = (np.eye(2)[y_test%2]+k0)*k1
    return X_train_extra, X_test_extra

### Use the following dataframe to record your data 

In [9]:
k_set     = [(0,1), (-.5,2),(-.5,30), (0,15),(0,30), None]
act_set   = ["linear", "relu", "tanh"]
arch_set  = ["A", "B", "C"]


r_test = pd.DataFrame(np.zeros((len(arch_set)*len(act_set), len(k_set)))*np.nan, 
                      index=[[a+"-"+b for a,b in itertools.product (arch_set, act_set)]],
                      columns=[str(i) for i in k_set])
r_test

Unnamed: 0,"(0, 1)","(-0.5, 2)","(-0.5, 30)","(0, 15)","(0, 30)",None
A-linear,,,,,,
A-relu,,,,,,
A-tanh,,,,,,
B-linear,,,,,,
B-relu,,,,,,
B-tanh,,,,,,
C-linear,,,,,,
C-relu,,,,,,
C-tanh,,,,,,


### Build a loop over the configurations

In [36]:
for k, act, arch in itertools.product(k_set, act_set, arch_set):
    
    print "%10s %8s %1s"%(str(k), act, arch)    
    
    if arch == 'A':
        model = get_model_A(input_dim=X.shape[1], s1=50, s2=30, s3=20,s3_activation=act)
        model.fit(X_train, y_train_oh, epochs=200, batch_size=32, validation_data=(X_test, y_test_oh)) 
        preds_test = model.predict(X_test).argmax(axis=1)    
      
    if arch == 'B':
        X_train_extra, X_test_extra = get_X_extra(y_train, y_test, k0=k[0], k1=k[1])
        model = get_model_B(input_dim=X.shape[1], extra_info_dim=X_train_extra.shape[1], s1=50, s2=30, s3=20, s3_activation=act)
        model.fit([X_train, X_train_extra], y_train_oh, epochs=200, batch_size=100, validation_data=([X_test, X_test_extra], y_test_oh))
        preds_test = model.predict([X_test, X_test_extra]).argmax(axis=1)
        
   
    if arch=='C' and k!=None:
        X_train_extra, X_test_extra = get_X_extra(y_train, y_test, k0=k[0], k1=k[1])
        model = get_model_C(input_dim=X.shape[1], extra_info_dim=X_train_extra.shape[1], s1=50, s2=30, s3=20, s3_activation=act)
        model.fit([X_train, X_train_extra], y_train_oh, epochs=200, batch_size=32, validation_data=([X_test, X_test_extra], y_test_oh))
        preds_test = model.predict([X_test, X_test_extra]).argmax(axis=1)

    
   
    acc = np.mean(preds_test==y_test)
    model_act = arch+"-"+act
    r_test.set_value(model_act, str(k), str(acc))   


    (0, 1)   linear A


ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 2 arrays: [array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0....

In [None]:
r_test

### Build the heatmap

In [None]:
plt.figure(figsize=(8,3))

... YOUR CODE HERE ...

### Build the bar plots

In [None]:
plt.figure(figsize=(15,4))
plt.subplot(121)

... YOUR CODE HERE ...

plt.subplot(122)

... YOUR CODE HERE ...