In [29]:
import tensorflow as tf
import numpy as np


In [30]:
x = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])

In [31]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, Concatenate
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import plot_model
from tensorflow.keras.initializers import Constant
from tensorflow.keras.constraints import Constraint

# Exercise 2A

In [109]:
def get_model(act: str):
    model = Sequential()
    model.add(Dense(2, activation=act))
    model.add(Dense(1, activation=act))
    return model

In [110]:
def compare(y_true, model, x):
    y_pred = (model.predict(x)>0.5).astype("int16")
    
    for y,pred in zip(y_true, y_pred):
        print(f"Expected: {y} ; Output: {pred}")

## Without Momentum

### Sigmoid

In [213]:
model = get_model("sigmoid")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2))
model.fit(x,y, epochs=5000, verbose=0,)

<keras.callbacks.History at 0x24eac8400a0>

In [214]:
print(model.evaluate(x,y))

0.04833178222179413


In [215]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [1]
Expected: [1] ; Output: [1]
Expected: [0] ; Output: [0]


### Tanh

In [295]:
model = get_model("tanh")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2))
model.fit(x,y,epochs=500, verbose=0)

<keras.callbacks.History at 0x24ebbdfb130>

In [296]:
print(model.evaluate(x,y))

0.0024268738925457


In [297]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [1]
Expected: [1] ; Output: [1]
Expected: [0] ; Output: [0]


### ReLu

In [477]:
model = get_model("relu")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2))
model.fit(x,y,epochs=1000, verbose=0)

<keras.callbacks.History at 0x24edfc5f880>

In [478]:
print(model.evaluate(x,y))

0.1666666567325592


In [479]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [1]
Expected: [1] ; Output: [0]
Expected: [0] ; Output: [0]


## With Momentum

### Sigmoid

In [343]:
model = get_model("sigmoid")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2, momentum=0.25))
model.fit(x,y, epochs=4000, verbose=0,)

<keras.callbacks.History at 0x24ea5cbbaf0>

In [344]:
print(model.evaluate(x,y))

0.09335245192050934


In [345]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [1]
Expected: [1] ; Output: [1]
Expected: [0] ; Output: [0]


### Tanh

In [328]:
model = get_model("tanh")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2, momentum=0.5))
model.fit(x,y,epochs=100, verbose=0)

<keras.callbacks.History at 0x24ec06e4b20>

In [329]:
print(model.evaluate(x,y))

0.02062336727976799


In [330]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [1]
Expected: [1] ; Output: [1]
Expected: [0] ; Output: [0]


### ReLu

In [462]:
model = get_model("relu")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2, momentum=0.5))
model.fit(x,y,epochs=1000, verbose=0)

<keras.callbacks.History at 0x24edc83f400>

In [463]:
print(model.evaluate(x,y))

0.1666666716337204


In [464]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [0]
Expected: [1] ; Output: [1]
Expected: [0] ; Output: [0]


# Exercise 2B

In [452]:
def get_model_2(act: str):
    inputs = Input(shape=2)
    output = Dense(1,activation=act)(inputs)

    second_input = Concatenate(axis=1)([inputs, output])

    output2 = Dense(1, activation=act)(second_input)
    model = Model(inputs=inputs, outputs=output2)
    return model

## Without Momentum

### Sigmoid

In [538]:
model = get_model_2("sigmoid")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2))
model.fit(x,y, epochs=10000, verbose=0)

<keras.callbacks.History at 0x24eedaf0250>

In [539]:
print(model.evaluate(x,y))

0.005516805686056614


In [540]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [1]
Expected: [1] ; Output: [1]
Expected: [0] ; Output: [0]


### Tanh

In [646]:
model = get_model_2("tanh")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2))
model.fit(x,y, epochs=500, verbose=0)

<keras.callbacks.History at 0x24f03bf9220>

In [647]:
print(model.evaluate(x,y))

0.012203074991703033


In [648]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [1]
Expected: [1] ; Output: [1]
Expected: [0] ; Output: [0]


### ReLu

In [643]:
model = get_model_2("relu")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2))
model.fit(x,y, epochs=20000, verbose=0)

<keras.callbacks.History at 0x24f029ce4f0>

In [644]:
print(model.evaluate(x,y))

0.5


In [645]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [0]
Expected: [1] ; Output: [0]
Expected: [0] ; Output: [0]


## With Momentum

### Sigmoid

In [553]:
model = get_model_2("sigmoid")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2, momentum=0.25))
model.fit(x,y, epochs=5000, verbose=0)

<keras.callbacks.History at 0x24edfd59d30>

In [554]:
print(model.evaluate(x,y))

0.02028316631913185


In [555]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [1]
Expected: [1] ; Output: [1]
Expected: [0] ; Output: [0]


### Tanh

In [622]:
model = get_model_2("tanh")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2, momentum=0.25))
model.fit(x,y, epochs=500, verbose=0)

<keras.callbacks.History at 0x24efd172dc0>

In [623]:
print(model.evaluate(x,y))

0.020200807601213455


In [624]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [1]
Expected: [1] ; Output: [1]
Expected: [0] ; Output: [0]


### ReLu

In [640]:
model = get_model_2("relu")
model.compile(loss="mean_squared_error", optimizer=SGD(0.2, momentum=0.25))
model.fit(x,y, epochs=20000, verbose=0)

<keras.callbacks.History at 0x24f017ef6a0>

In [641]:
print(model.evaluate(x,y))

0.2499999850988388


In [642]:
compare(y, model, x)

Expected: [0] ; Output: [0]
Expected: [1] ; Output: [0]
Expected: [1] ; Output: [0]
Expected: [0] ; Output: [0]


# Comments

The network of *Problem 2* in general obtained more consistent results using considerable less *epochs* than the network of *Problem 1*.
In both scenarios we noticed that ReLu activation function was not good for the problem at hands, as even with very large number of *epochs* it did not have reach the solution. On the other hand, hyperbolic tangent seems to be the best activation function has it reached the solution with very few epochs.

In both cases, using the momentum, the desired result was, in general, reached faster.