In [82]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [None]:
data = pd.read_excel('Ratings_exercise.xlsx')

features = data[['rel_size', 'excess_rets', 'idio_stdev', 'ni_ta', 'tl_ta']]
labels = data['ratings9'].astype(int)  

unique_ratings = sorted(labels.unique())
rating_to_idx = {r: i for i, r in enumerate(unique_ratings)}
labels = labels.map(rating_to_idx)
num_classes = len(unique_ratings)

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

X = features_scaled
y = labels.values


In [84]:
X.shape 

(2717, 5)

In [85]:
y.shape

(2717,)

In [None]:

class FeedforwardNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedforwardNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

input_dim = X.shape[1]      # number of predictors (5)
hidden_dim = 10000             # hidden layer size (adjustable)
output_dim = num_classes    # number of classes
num_epochs = 100            # training epochs
learning_rate = 0.001       # learning rate

In [87]:
print(input_dim)

5


In [None]:

kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_results = []

for fold, (train_index, test_index) in enumerate(kf.split(X), 1):
    print(f"Fold {fold}")
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.LongTensor(y_train)
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.LongTensor(y_test)
    
    model = FeedforwardNN(input_dim, hidden_dim, output_dim)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()

Fold 1
Fold 2
Fold 3
Fold 4
Fold 5


In [None]:

model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    _, predicted = torch.max(test_outputs, 1)

nn_acc = accuracy_score(y_test, predicted.numpy())
print(f"Neural Network Accuracy: {nn_acc:.4f}")

unique_labels_fold = np.unique(np.concatenate((y_test, predicted.numpy())))
print("Neural Network Classification Report:")
print(classification_report(y_test, predicted.numpy(), labels=unique_labels_fold,
                            target_names=[str(l) for l in unique_labels_fold]))
print("Neural Network Confusion Matrix:")
print(confusion_matrix(y_test, predicted.numpy()))

Neural Network Accuracy: 0.6317
Neural Network Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       0.67      0.29      0.40         7
           2       0.50      0.39      0.44        41
           3       0.66      0.69      0.67       167
           4       0.56      0.64      0.60       152
           5       0.72      0.72      0.72       147
           6       1.00      0.20      0.33        25
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         2

    accuracy                           0.63       543
   macro avg       0.57      0.44      0.46       543
weighted avg       0.65      0.63      0.62       543

Neural Network Confusion Matrix:
[[  1   0   0   0   0   0   0   0   0]
 [  0   2   5   0   0   0   0   0   0]
 [  0   0  16  23   2   0   0   0   0]
 [  0   1  10 116  37   3   0   0   0]
 [  0   0   0  35  97  20   0   0   

In [None]:
logit_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
logit_model.fit(X_train, y_train)
logit_pred = logit_model.predict(X_test)
logit_acc = accuracy_score(y_test, logit_pred)
print(f"Logistic Regression Accuracy: {logit_acc:.4f}")

unique_labels_fold_logit = np.unique(np.concatenate((y_test, logit_pred)))
print("Logistic Regression Classification Report:")
print(classification_report(y_test, logit_pred, labels=unique_labels_fold_logit,
                            target_names=[str(l) for l in unique_labels_fold_logit]))
print("Logistic Regression Confusion Matrix:")
print(confusion_matrix(y_test, logit_pred))
print("-" * 50)

fold_results.append({'nn_acc': nn_acc, 'logit_acc': logit_acc})


Logistic Regression Accuracy: 0.6206
Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.33      0.14      0.20         7
           2       0.55      0.29      0.38        41
           3       0.64      0.74      0.69       167
           4       0.55      0.61      0.57       152
           5       0.70      0.70      0.70       147
           6       0.71      0.20      0.31        25
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         2

    accuracy                           0.62       543
   macro avg       0.39      0.30      0.32       543
weighted avg       0.62      0.62      0.61       543

Logistic Regression Confusion Matrix:
[[  0   1   0   0   0   0   0   0   0]
 [  0   1   4   2   0   0   0   0   0]
 [  0   0  12  26   3   0   0   0   0]
 [  0   1   6 124  35   1   0   0   0]
 [  0   0   0  37  92

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
nn_avg_acc = np.mean([res['nn_acc'] for res in fold_results])
logit_avg_acc = np.mean([res['logit_acc'] for res in fold_results])
print(f"Average Neural Network Accuracy: {nn_avg_acc:.4f}")
print(f"Average Logistic Regression Accuracy: {logit_avg_acc:.4f}")

Average Neural Network Accuracy: 0.6317
Average Logistic Regression Accuracy: 0.6206


# No Folds

In [None]:
data = pd.read_excel('Ratings_exercise.xlsx')

features = data[['rel_size', 'excess_rets', 'idio_stdev', 'ni_ta', 'tl_ta']]
labels = data['ratings9'].astype(int)

unique_ratings = sorted(labels.unique())
rating_to_idx = {r: i for i, r in enumerate(unique_ratings)}
labels = labels.map(rating_to_idx)
num_classes = len(unique_ratings)

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

X = features_scaled
y = labels.values

In [None]:
class FeedforwardNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedforwardNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

input_dim = X.shape[1]      
hidden_dim = 10000 
output_dim = num_classes    
num_epochs = 100            
learning_rate = 0.001       

X_tensor = torch.FloatTensor(X)
y_tensor = torch.LongTensor(y)

model = FeedforwardNN(input_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_tensor)
    loss = criterion(outputs, y_tensor)
    loss.backward()
    optimizer.step()


In [None]:
model.eval()
with torch.no_grad():
    outputs = model(X_tensor)
    _, predicted = torch.max(outputs, 1)

nn_acc = accuracy_score(y, predicted.numpy())
print("Neural Network Accuracy: {:.4f}".format(nn_acc))
print("Neural Network Classification Report:")
print(classification_report(y, predicted.numpy(), target_names=[str(r) for r in unique_ratings]))
print("Neural Network Confusion Matrix:")
print(confusion_matrix(y, predicted.numpy()))

Neural Network Accuracy: 0.6643
Neural Network Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.33      0.50         6
           1       0.73      0.49      0.58        39
           2       0.63      0.38      0.47       254
           3       0.65      0.72      0.68       797
           4       0.63      0.68      0.65       853
           5       0.73      0.74      0.73       670
           6       0.77      0.48      0.59        86
           7       0.00      0.00      0.00         5
           8       1.00      0.29      0.44         7

    accuracy                           0.66      2717
   macro avg       0.68      0.46      0.52      2717
weighted avg       0.67      0.66      0.66      2717

Neural Network Confusion Matrix:
[[  2   4   0   0   0   0   0   0   0]
 [  0  19  14   6   0   0   0   0   0]
 [  0   1  97 137  16   3   0   0   0]
 [  0   1  41 572 169  14   0   0   0]
 [  0   1   3 150 579 120   0   0   

In [None]:
logit_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
logit_model.fit(X, y)
logit_pred = logit_model.predict(X)
logit_acc = accuracy_score(y, logit_pred)
print("\nLogistic Regression Accuracy: {:.4f}".format(logit_acc))
print("Logistic Regression Classification Report:")
print(classification_report(y, logit_pred, target_names=[str(r) for r in unique_ratings]))
print("Logistic Regression Confusion Matrix:")
print(confusion_matrix(y, logit_pred))


Logistic Regression Accuracy: 0.6150
Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.72      0.33      0.46        39
           2       0.58      0.26      0.36       254
           3       0.60      0.71      0.65       797
           4       0.58      0.63      0.61       853
           5       0.69      0.69      0.69       670
           6       0.58      0.29      0.39        86
           7       0.00      0.00      0.00         5
           8       0.00      0.00      0.00         7

    accuracy                           0.62      2717
   macro avg       0.42      0.32      0.35      2717
weighted avg       0.61      0.62      0.60      2717

Logistic Regression Confusion Matrix:
[[  0   3   3   0   0   0   0   0   0]
 [  0  13  18   8   0   0   0   0   0]
 [  0   0  67 167  17   3   0   0   0]
 [  0   1  26 566 192  12   0   0   0]
 [  0   1   1 178 54

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
