In [None]:
#imports

import numpy as np
import pandas as pd
import sklearn
# from xgboost import XGBRegressor
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from torchdiffeq import odeint
import matplotlib.pyplot as plt
import warnings

from sklearn.svm import SVR
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.ensemble import VotingRegressor, RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from sklearn.model_selection import cross_val_score

warnings.filterwarnings('ignore')





In [2]:
# Data

#Load Dataset
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.3, random_state=42)


URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000)>

In [None]:
# create individual regression models

# Neural ODE (from https://www.geeksforgeeks.org/building-a-basic-neural-ode-model/), takes a 2D input and outputs a 2d output
# it defines the time derivative of the hidden state, how the hidden state evolves over time
# Step 1: Define the Neural ODE Model
class ODEFunc(nn.Module):
    def __init__(self):
        super(ODEFunc, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(2, 50),   # Input dimension is 2, hidden layer size is 50
            nn.Tanh(),          # Activation function (Tanh)
            nn.Linear(50, 2)    # Output dimension is 2
        )

    def forward(self, t, y):
        return self.net(y)  # Forward pass: returns the time derivative of the state



In [None]:
def solve_ode():
    func = ODEFunc()                          # Instantiate the ODE function
    y0 = torch.tensor([[0.0, 1.0]])           # Initial hidden state (2D vector)
    t = torch.linspace(0, 25, 100)            # Time points to evaluate the solution
    y = odeint(func, y0, t)                   # Solve the ODE
    
    # Print the time points and the corresponding ODE solution
    for i in range(len(t)):
        print(f"Time: {t[i].item()}, Solution: {y[i].detach().numpy()}")
    
    return t, y


In [None]:
# Visualizing the Solution
def plot_trajectory():
    t, y = solve_ode()                        # Solve the ODE and get the time and solution
    plt.plot(y[:, 0, 0].detach().numpy(), y[:, 0, 1].detach().numpy())  # Plot the trajectory
    plt.title('Trajectory of Neural ODE')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.show()

# Call the function to visualize the solution
plot_trajectory()

In [None]:
# training the ODE

# Define the Training Loop
def train_ode(model, optimizer, criterion, data):
    for epoch in range(100):                    # Number of epochs (iterations)
        optimizer.zero_grad()                   # Clear gradients from the previous step
        pred_y = odeint(model, data['y0'], data['t'])  # Solve ODE for current model state
        loss = criterion(pred_y.squeeze(), data['y_true'])  # Compute loss between predicted and true values
        loss.backward()                         # Backpropagate the error
        optimizer.step()                        # Update the model parameters
        if (epoch + 1) % 10 == 0:               # Print every 10 epochs
            print(f'Epoch {epoch+1}, Loss: {loss.item()}')



In [None]:
# Visualizing the Results
def plot_results(model, data):
    with torch.no_grad():
        pred_y = odeint(model, data['y0'], data['t'])
        plt.plot(data['t'].numpy(), data['y_true'].numpy(), label='True')
        plt.plot(data['t'].numpy(), pred_y.squeeze().numpy(), '--', label='Predicted')
        plt.legend()
        plt.title('True vs Predicted Neural ODE')
        plt.show()

In [None]:
# Random Forest

# data preparation

#load data in panda's data frame
# extract all the features from the DataFrame and stores them in a variable named x
#target values stores in y

#df = pd.read.csv('data.csv')
#df.info()
# X = df.iloc[:,1:2].values
# y = df.iloc[:,2].values


In [None]:
# Random Forest Regressor Model

#Check for and handle categorical variables
label_encoder = LabelEncoder()
x_categorical = df.select_dtypes(include=['object']).apply(label_encoder.fit_transform)
x_numerical = df.select_dtypes(exclude=['object']).values
x = pd.concat([pd.DataFrame(x_numerical), x_categorical], axis=1).values

regressor = RandomForestRegressor(n_estimators=10, random_state=0, oob_score=True)

regressor.fit(x, y)

In [None]:
# predications and evaluations of Random Forest

#oob_score: estimates the model's generalization performance
oob_score = regressor.oob_score_
print(f'Out-of-Bag Score: {oob_score}')

# make predictions using trained model and stores them in the 'predictions' array
predictions = regressor.predict(x)

# loss functions to evaluate model's performance
mse = mean_squared_error(y, predictions)
print(f'Mean Squared Error: {mse}')

r2 = r2_score(y, predictions)
print(f'R-squared: {r2}')


In [None]:
# random forest results visualization

# creates a grid of prediction points covering the range of the features values
# plots the real data points as blue scatter points
# plots the predicted values for the prediction grid as a green line

# Generate X_grid with 3 features
X_grid = np.arange(min(X[:, 0]), max(X[:, 0]), 0.01)  # Only the first feature
X_grid = X_grid.reshape(-1, 1)
X_grid = np.hstack((X_grid, np.zeros((X_grid.shape[0], 2))))  # Pad with zeros

# Plot results
plt.scatter(X[:, 0], y, color='blue', label="Actual Data")  # Plot first feature vs target
plt.plot(X_grid[:, 0], regressor.predict(X_grid), color='green', label="Random Forest Prediction")  
plt.title("Random Forest Regression Results")
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.legend()
plt.show()

In [None]:
# random forest single decision tree visualization
# visualizes one of the decision trees from the trained random forest model
# plots the selected decision tree, displaying the decision-making process of a single tree within ensemble

tree_to_plot = regressor.estimators_[0]

# Plot the decision tree
plt.figure(figsize=(20, 10))
plot_tree(tree_to_plot, feature_names=df.columns.tolist(), filled=True, rounded=True, fontsize=10)
plt.title("Decision Tree from Random Forest")
plt.show()



In [None]:
# Voting Regressor 


ODE_nn = ODEFunc()
svr_reg = SVR(kernel='sigmoid')  # You can choose different kernels if needed
# xgb_reg = XGBRegressor(random_state=42)
rf_reg = RandomForestRegressor(random_state=42)


In [None]:
# Back to Voting Regressor

# Create a Voting Regressor with the individual models
voting_reg = VotingRegressor(estimators=[('ODE_nn', ODE_nn),  ('svr', svr_reg), ('rf', rf_reg)],
                             n_jobs=5)

# Fit the Voting Regressor on the training data
voting_reg.fit(X_train, y_train) 


In [None]:
# Voting Evaluation

# Make predictions on the testing data
pred = voting_reg.predict(X_test)

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, pred)

# Calculate R-squared (R2) score
r2 = r2_score(y_test, pred)

# Print the evaluation metrics
print(f'Mean Absolute Error (MAE): {mae:.2f}')
print(f'R-squared (R2) Score: {r2:.2f}')


In [None]:
# data cut
xt = data[:20]

ODE_nn.fit(X_train, y_train)
svr_reg.fit(X_train, y_train)
rf_reg.fit(X_train, y_train)
voting_reg.fit(X_train, y_train)

pred_ODE_nn = ODE_nn.predict(xt)
pred_svr_reg = svr_reg.predict(xt)
pred_rf_reg = rf_reg.predict(xt)
pred_voting_reg = voting_reg.predict(xt)

plt.figure()
plt.plot(pred_rf_reg, "b^", label="RandomForestRegressor")
plt.plot(pred_svr_reg, "g^", label="Support Vector Regressor")
plt.plot(pred_ODE_nn, "bo", label="ODE_nn")

plt.plot(pred_voting_reg, "r*", ms=10, label="VotingRegressor")
plt.plot( target[:20],"y*--", label = 'Test Data Points')

plt.tick_params(axis="x", which="both", bottom=False, top=False, labelbottom=False)
plt.ylabel("predicted")
plt.xlabel("training samples")
plt.legend(loc="best")
plt.title("Regressor predictions and their average")

plt.show()
