In [1]:
import torch
import torch.nn as nn
import numpy as np

### inputs

In [2]:
x_color = [ [1, 0,  1],   # 1 color red, no word, color naming task   
            [0, 1,  1] ]  # 2 color green, no word, color naming task   
x_color = torch.tensor(x_color)  
x_color = x_color.float()
X = x_color

y_color = [ [1, 0],       # 1 correct response red
            [0, 1] ]      # 2 correct response green
y_color = torch.tensor(y_color) 
y_color = y_color.float()
Y = y_color

### test input

In [3]:
X_test = [ [1, 0,  1],  
           [0, 1,  1] ]
X_test = torch.tensor(X_test)  
X_test = X_test.float()

### model

In [4]:
class Stroop(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.input_size = 3   # currently color red, green, and color naming task
        self.hidden_size = 2  # two hidden units for color naming task
        self.output_size = 2  # output (correct response) is either red or green

        self.w1 = torch.randn(self.input_size, self.hidden_size)  # weight for hidden layer
        self.w2 = torch.randn(self.hidden_size, self.output_size) # weight for output layer

        self.b1 = torch.randn(1, self.hidden_size) # bias for hidden layer
        self.b2 = torch.randn(1, self.output_size) # bias for output layer       
        
    def sigmoid_activation(self, act):
        return 1 / (1 + torch.exp(-act))
        
    def forward(self, X):
        self.input_hidden = torch.mm(X, self.w1) + self.b1
        self.act_input_hidden = self.sigmoid_activation(self.input_hidden) # activation of hidden layer 
        self.hidden_output = torch.mm(self.act_input_hidden, self.w2) + self.b2
        act_hidden_output = self.sigmoid_activation(self.hidden_output)    # output of activation of final layer 
        return act_hidden_output
    
    def derivative_sigmoid(self, act_deriv):
        return act_deriv * (1 - act_deriv)
    
    def backpropagation(self, X, Y, act_hidden_output):
        self.output_error = Y - act_hidden_output
        self.output_deriv_error = self.output_error * self.derivative_sigmoid(self.hidden_output) # ask Taku/Mike about whether to use 'hidden_output' or 'act_hidden_output'
        
        self.hidden_error = torch.mm(self.output_deriv_error, self.w2.t()) # how much the hidden layer weights contributed to output error
        self.hidden_deriv_error = self.hidden_error * self.derivative_sigmoid(self.input_hidden)
        
        self.learning_rate = 0.1
        
        self.w2 += torch.mm(self.act_input_hidden.t(), self.output_deriv_error) * self.learning_rate
        self.w1 += torch.mm(X.t(), self.hidden_deriv_error) * self.learning_rate

        self.b2 += self.output_deriv_error.sum() * self.learning_rate
        self.b1 += self.hidden_deriv_error.sum() * self.learning_rate
    
    def train(self, X, Y):
        act_hidden_output = self.forward(X)
        self.backpropagation(X, Y, act_hidden_output)
        
    def save_w(self):
        np.savetxt("w1.txt", self.w1, fmt = "%s")
        np.savetxt("w2.txt", self.w2, fmt = "%s")
        
    def test(self):
        print("Test new input based on trained weights: ")
        print("Input: \n" + str(X_test))
        print("Output: \n" + str(self.forward(X_test)))

### training!

In [5]:
model = Stroop()
for iteration in range(100):
    print("# " + str(iteration) + "\n")
    print("Input: \n" + str(X))
    print("Correct output: \n" + str(Y))
    print("NN output: \n" + str(model.forward(X)))
    mse = torch.mean(Y - model.forward(X))**2 # mean sum squared loss
    print("Loss: \n" + str(mse)) 
    print("\n")
    model.train(X, Y)

model.save_w()
model.test()

# 0

Input: 
tensor([[1., 0., 1.],
        [0., 1., 1.]])
Correct output: 
tensor([[1., 0.],
        [0., 1.]])
NN output: 
tensor([[0.5424, 0.7690],
        [0.6426, 0.8632]])
Loss: 
tensor(0.0417)


# 1

Input: 
tensor([[1., 0., 1.],
        [0., 1., 1.]])
Correct output: 
tensor([[1., 0.],
        [0., 1.]])
NN output: 
tensor([[0.5246, 0.7644],
        [0.6123, 0.8495]])
Loss: 
tensor(0.0352)


# 2

Input: 
tensor([[1., 0., 1.],
        [0., 1., 1.]])
Correct output: 
tensor([[1., 0.],
        [0., 1.]])
NN output: 
tensor([[0.5105, 0.7625],
        [0.5815, 0.8344]])
Loss: 
tensor(0.0297)


# 3

Input: 
tensor([[1., 0., 1.],
        [0., 1., 1.]])
Correct output: 
tensor([[1., 0.],
        [0., 1.]])
NN output: 
tensor([[0.5004, 0.7630],
        [0.5537, 0.8197]])
Loss: 
tensor(0.0254)


# 4

Input: 
tensor([[1., 0., 1.],
        [0., 1., 1.]])
Correct output: 
tensor([[1., 0.],
        [0., 1.]])
NN output: 
tensor([[0.4939, 0.7656],
        [0.5293, 0.8059]])
Loss: 
tensor(0.022