In [11]:
import numpy as np
import pandas as pd

from loguru import logger

In [3]:

class NN:
    def __init__(self, layer_sizes):
        self.layer_sizes = layer_sizes
        self.weights = [np.random.randn(layer_sizes[i], layer_sizes[i+1]) for i in range(len(layer_sizes)-1)]
        self.biases = [np.zeros(layer_sizes[i+1]) for i in range(len(layer_sizes)-1)]
    
    def layers(self):
        return [(self.weights[i], self.biases[i]) for i in range(len(self.layer_sizes)-1)]
    
    def forward(self, X):
        activations = [X]
        for i in range(len(self.layer_sizes)-1):
            z = np.dot(activations[-1], self.weights[i]) + self.biases[i]
            activation = np.maximum(0, z) # ReLU activation
            activations.append(activation)
        return activations
    
    def backward(self, X, y):
        activations = self.forward(X)
        d_activations = [2*(activations[-1] - y)]
        d_weights = []
        d_biases = []
        for i in range(len(self.layer_sizes)-2, -1, -1):
            d_activation = d_activations[0]
            z = np.dot(activations[i], self.weights[i]) + self.biases[i]
            d_z = d_activation * (z > 0).astype(float) # ReLU derivative
            d_weight = np.dot(activations[i].T, d_z)
            d_bias = np.sum(d_z, axis=0)
            d_activations.insert(0, np.dot(d_z, self.weights[i].T))
            d_weights.insert(0, d_weight)
            d_biases.insert(0, d_bias)
        return d_weights, d_biases
    
    def train(self, X, y, learning_rate=0.1, num_epochs=1000):
        for epoch in range(num_epochs):
            d_weights, d_biases = self.backward(X, y)
            for i in range(len(self.layer_sizes)-1):
                self.weights[i] -= learning_rate * d_weights[i]
                self.biases[i] -= learning_rate * d_biases[i]
    
    def predict(self, X):
        activations = self.forward(X)
        return activations[-1]


In [15]:
def read_csv(path: str) -> pd.DataFrame:
    file = pd.read_csv(path)
    return file

TRAIN_SET = pd.DataFrame(read_csv("./train.csv"))
TEST_SET = pd.DataFrame(read_csv("./test.csv"))
TRAIN_SET.head()

Unnamed: 0,id,text,emotions
0,27383,i feel awful about it too because it s my job ...,sadness
1,110083,im alone i feel awful,sadness
2,140764,ive probably mentioned this before but i reall...,joy
3,100071,i was feeling a little low few days back,sadness
4,2837,i beleive that i am much more sensitive to oth...,love


In [22]:
dataset_train = TRAIN_SET
dataset_test = TEST_SET

In [24]:
texts_train = np.array([dataset_train["text"]])
emotions_train = np.array([dataset_train["emotions"]])

texts_test = np.array([dataset_test["text"]])

In [18]:
emotions_train

array([['sadness', 'sadness', 'joy', ..., 'sadness', 'surprise',
        'sadness']], dtype=object)

In [25]:
labels = np.unique(emotions_train)
num_labels = len(labels)

one_hot = np.zeros((num_labels, num_labels), np.int8)
np.fill_diagonal(one_hot, 1)

label_dict = dict(zip(labels, one_hot))
# logger.debug(label_dict)

gt_one_hot = np.array([label_dict[label] for label in emotions_train[0]])

In [26]:
layer_sizes = [len(texts_train[0]), 128, 64, num_labels]
nn = NN(layer_sizes)
nn.train(texts_train, emotions_train)

emotions_pred = nn.predict(texts_test)

emotions_pred_lables = np.array([labels[_] for _ in np.argmax(emotions_pred, axis=1)])



TypeError: can't multiply sequence by non-int of type 'float'