In [1]:
import pandas as pd
import torch
import json
import numpy as np
from torch.utils.data import random_split, DataLoader
from neural_test import train_model, test_model, CustomDataset, make_and_eval
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import KFold
from matplotlib import pyplot as plt

In [2]:
class DynamicNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_hidden_layers):
        super(DynamicNN, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.num_hidden_layers = num_hidden_layers

        # Create the first layer
        self.layers = [torch.nn.Linear(input_size, hidden_size), torch.nn.ReLU()]

        # Create the hidden layers
        for _ in range(num_hidden_layers):
            self.layers.append(torch.nn.Linear(hidden_size, hidden_size))
            self.layers.append(torch.nn.ReLU())

        # Create the output layer
        self.layers.append(torch.nn.Linear(hidden_size, output_size))

        # Combine all layers
        self.model = torch.nn.Sequential(*self.layers)

    def forward(self, x):
        out = self.model(x)
        if not self.training:
            out = torch.clamp(out, min=1, max=5)
        return out

In [3]:
device = "cpu"
# Load JSON data
with open('../processed_data/topic-sentiment-total.json') as file:
    json_data = json.load(file)

# Convert to DataFrame
df = pd.DataFrame.from_dict(json_data, orient='index')

# Shuffle the DataFrame
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Convert DataFrame to numpy array
data = df.to_numpy()
df.head()

Unnamed: 0,pos_food,pos_service,pos_location,pos_clean,pos_price,neg_food,neg_service,neg_location,neg_clean,neg_price,rating
0,20,7,2,1,0,7,1,0,0,1,4.3
1,119,37,38,3,11,31,2,0,0,2,4.6
2,228,85,59,4,53,100,42,19,1,23,4.2
3,22,10,2,2,2,4,4,0,1,2,4.2
4,60,32,37,0,7,32,12,8,0,14,4.1


In [6]:
input_dim = 10
output_dim = 1 
hidden_dim = 40
num_hidden_layers = 4
learningRate = .01
epochs = 150

lambda1 = 0.0000 # l1 regularization parameter (sum of weights)
lambda2 = 0.0000 # l2 regularization parameter (sum of square of weights)

model = DynamicNN(input_dim, hidden_dim, output_dim, num_hidden_layers).to(device)
# Split the dataset
train_size = int(0.8 * len(data))
test_size = len(data) - train_size
train_data, test_data = random_split(data, [train_size, test_size], generator=torch.Generator().manual_seed(42))

trained_model, _ = make_and_eval(model, np.array(train_data), np.array(test_data), learningRate, epochs, lambda1, lambda2)

Mean Squared Error: 0.05304168537259102
Root Mean Squared Error: 0.23030780255794525
R-squared: 0.6994195271224257


In [None]:
# torch.save(model.state_dict(), "../nn4_40.pt")

In [13]:
input_dim = 10
output_dim = 1 
hidden_dim = 40
num_hidden_layers = 4
learningRate = .005
epochs = 150

k_folds = 5
# Create KFold object
kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

models: list[torch.nn.Module] = []
all_mse = []
all_rmse = []
all_r2 = []

for fold, (train_ids, test_ids) in enumerate(kf.split(data)):
    print(f"Fold {fold + 1}/{k_folds}")

    model = DynamicNN(input_dim, hidden_dim, output_dim, num_hidden_layers).to(device)
    trained_model, evals = make_and_eval(model, data[train_ids], data[test_ids], learningRate, epochs, lambda1, lambda2)

    models.append(trained_model)
    all_mse.append(evals[1])
    all_rmse.append(evals[2])
    all_r2.append(evals[3])

print(f'Average Mean Squared Error: {np.mean(all_mse)}')
print(f'Average Root Mean Squared Error: {np.mean(all_rmse)}')
print(f'Average R-squared: {np.mean(all_r2)}')


Fold 1/5
Mean Squared Error: 0.048359695822000504
Root Mean Squared Error: 0.21990838646888733
R-squared: 0.7231472531847747
Fold 2/5
Mean Squared Error: 0.049392420798540115
Root Mean Squared Error: 0.2222440540790558
R-squared: 0.6874712020082114
Fold 3/5
Mean Squared Error: 0.06081642210483551
Root Mean Squared Error: 0.2466098517179489
R-squared: 0.6543938905932444
Fold 4/5
Mean Squared Error: 0.06988408416509628
Root Mean Squared Error: 0.26435598731040955
R-squared: 0.6462589132755403
Fold 5/5
Mean Squared Error: 0.06934943050146103
Root Mean Squared Error: 0.26334279775619507
R-squared: 0.6093479127534105
Average Mean Squared Error: 0.05956041067838669
Average Root Mean Squared Error: 0.24329224228858948
Average R-squared: 0.6641238343630362
