In [None]:
from sentence_transformers import SentenceTransformer
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from bs4 import BeautifulSoup

In [None]:
ratings = []
for i in range(1, 4):
    rating = pd.read_excel(f'data/jester-data-{i}.xls', header=None)
    rating = rating.iloc[:, 1:].replace(99, float('NaN'))
    ratings.append(rating)

df = pd.concat(ratings)


In [None]:
df.describe

In [None]:
df = df.mean()

In [None]:
df.describe

In [None]:
jokes_data = []

for i in range(1, 101):
    file_name = f'data/jokes/init{i}.html'
    with open(file_name, 'r') as file:
        joke_html = file.read()
        soup = BeautifulSoup(joke_html, 'html.parser')
        joke_text = soup.find('font', size='+1').text.strip()
        jokes_data.append(joke_text)

In [None]:
sentence_transformer = SentenceTransformer('bert-base-cased')
jokes = sentence_transformer.encode(jokes_data)

In [None]:
print(jokes)

In [None]:
train_X, val_X, train_y, val_y = train_test_split(
    jokes, 
    df, 
    test_size=0.3, 
    random_state=100)
print("Train X shape:", train_X.shape)
print("Train y shape:", train_y.shape)
print("Validation X shape:", val_X.shape)
print("Validation y shape:", val_y.shape)

In [None]:
def run_mlp_regressor(learning_rate=0.0001, hidden_layers=(10,), epochs = 250):
    mlp = MLPRegressor(solver='sgd',
                       alpha=0.0,
                       learning_rate='constant',
                       learning_rate_init=learning_rate,
                       hidden_layer_sizes=hidden_layers,
                       random_state=0
                       )
    
    train_loss = []
    validation_loss = []
    
    for _ in range(epochs):
        mlp.partial_fit(train_X, train_y)
        
        pred_train_y = mlp.predict(train_X)
        train_loss.append(mean_squared_error(train_y, pred_train_y))

        pred_val_y = mlp.predict(val_X)
        validation_loss.append(mean_squared_error(val_y, pred_val_y))
        
    return (train_loss, validation_loss)

In [None]:
train_loss, validation_loss = run_mlp_regressor(0.0001, (10, ), 1000)

plt.plot(range(len(train_loss)), train_loss, label=f'Train Loss')
plt.plot(range(len(validation_loss)), validation_loss, label=f'Validation Loss')
plt.legend()
plt.show()

In [None]:
train_loss, validation_loss = run_mlp_regressor(0.0001, (10, 10), 1000)

plt.plot(range(len(train_loss)), train_loss, label=f'Train Loss')
plt.plot(range(len(validation_loss)), validation_loss, label=f'Validation Loss')
plt.legend()
plt.show()

In [None]:
train_loss, validation_loss = run_mlp_regressor(0.0001, (10, 10, 10), 1000)

plt.plot(range(len(train_loss)), train_loss, label=f'Train Loss')
plt.plot(range(len(validation_loss)), validation_loss, label=f'Validation Loss')
plt.legend()
plt.show()

In [None]:
train_loss, validation_loss = run_mlp_regressor(0.0001, (50, 10), 1000)

plt.plot(range(len(train_loss)), train_loss, label=f'Train Loss')
plt.plot(range(len(validation_loss)), validation_loss, label=f'Validation Loss')
plt.legend()
plt.show()

In [None]:
train_loss, validation_loss = run_mlp_regressor(0.0001, (100, 50, 5), 1000)

plt.plot(range(len(train_loss)), train_loss, label=f'Train Loss')
plt.plot(range(len(validation_loss)), validation_loss, label=f'Validation Loss')
plt.legend()
plt.show()