In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Activation functions and their derivatives

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def initialize_parameters(input_size, hidden_size, output_size):
    Wxh = np.random.randn(input_size, hidden_size)
    Whh = np.random.randn(hidden_size, hidden_size)
    Why = np.random.randn(hidden_size, output_size)
    bh = np.zeros((1, hidden_size))
    by = np.zeros((1, output_size))
    return Wxh, Whh, Why, bh, by

def rnn_forward(inputs, h_prev, Wxh, Whh, Why, bh, by):
    h_t = sigmoid(np.dot(inputs, Wxh) + np.dot(h_prev, Whh) + bh)
    y_pred = sigmoid(np.dot(h_t, Why) + by)
    return h_t, y_pred

def rnn_backward(inputs, h_prev, h_t, y_pred, target, Wxh, Whh, Why, bh, by, learning_rate):
    dy = y_pred - target
    dWhy = np.dot(h_t.T, dy)
    dby = np.sum(dy, axis=0, keepdims=True)
    dh = np.dot(dy, Why.T) * h_t * (1 - h_t)
    dWhh = np.dot(h_prev.T, dh)
    dWxh = np.dot(inputs.T, dh)
    dbh = np.sum(dh, axis=0, keepdims=True)

    Wxh -= learning_rate * dWxh
    Whh -= learning_rate * dWhh
    Why -= learning_rate * dWhy
    bh -= learning_rate * dbh
    by -= learning_rate * dby

    return Wxh, Whh, Why, bh, by

def train_rnn(X, y, input_size, hidden_size, output_size, learning_rate, num_epochs):
    Wxh, Whh, Why, bh, by = initialize_parameters(input_size, hidden_size, output_size)

    for epoch in range(num_epochs):
        total_loss = 0.0
        h_prev = np.zeros((1, hidden_size))

        for inputs, target in zip(X, y):
            inputs = np.array([inputs])  # Convert inputs to a 2D array
            h_t, y_pred = rnn_forward(inputs, h_prev, Wxh, Whh, Why, bh, by)
            Wxh, Whh, Why, bh, by = rnn_backward(inputs, h_prev, h_t, y_pred, target, Wxh, Whh, Why, bh, by, learning_rate)

            if(y_pred<0.35):
              y_pred=-1
            elif(y_pred<0.65):
              y_pred=0
            else:
              y_pred=1
            #print(y_pred,target)
            loss = 0.5 * (y_pred - target)**2  # Mean squared error loss
            total_loss += loss

            h_prev = h_t

        average_loss = total_loss / len(y)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {average_loss:.4f}')

    return Wxh, Whh, Why, bh, by


def test_rnn(X_test, Wxh, Whh, Why, bh, by):
    cnt = 0
    h_prev = np.zeros((1, Wxh.shape[1]))
    predictions=[]
    for inputs in X_test:
        inputs = np.array([inputs])  # Convert inputs to a 2D array
        h_t, y_pred = rnn_forward(inputs, h_prev, Wxh, Whh, Why, bh, by)

        # Post-process y_pred based on your logic
        if y_pred < 0.35:
            y_pred = -1
        elif y_pred < 0.65:
            y_pred = 0
        else:
            y_pred = 1

        predictions.append(y_pred)
        h_prev = h_t

    return predictions


from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(np.array(df), np.array(dftrain["rating"]), test_size=0.2, random_state=42)
X_train = np.array(df)
y_train = np.array(dftrain["rating"])
input_size = len(df.columns)
hidden_size = 20
output_size = 1
learning_rate = 0.01
num_epochs =50

trained_weights = train_rnn(X_train, y_train, input_size, hidden_size, output_size, learning_rate, num_epochs)
X_test = dftestcount['review_description']

print(trained_weights)

predictions = test_rnn(X_test, *trained_weights)
final_df_test = pd.DataFrame({
    'ID': dftest['ID'],
    'review_description': dftest['review_description'],
    'rating_test': predictions
})

print(final_df_test)
final_df_test.to_csv('output_stemming.csv')