# EE399 HW5
## Ziwen


https://github.com/ZiwenLi0325/EE399.git

In [39]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
from scipy import integrate
from mpl_toolkits.mplot3d import Axes3D
import torch
from torch import nn, optim
from torch.optim import Adam
from sklearn.metrics import mean_squared_error

In [40]:
dt = 0.01
T = 8
t = np.arange(0,T+dt,dt)
beta = 8/3
sigma = 10
rho = 28

# Define activation functions
def logsig(x):
    return 1 / (1 + torch.exp(-x))

def radbas(x):
    return torch.exp(-torch.pow(x, 2))

def purelin(x):
    return x

# Define the Lorenz system
def lorenz_deriv(x_y_z, t0, sigma=sigma, beta=beta, rho=rho):
    x, y, z = x_y_z
    return [sigma * (y - x), x * (rho - z) - y, x * y - beta * z]

# Define the neural network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(3, 50)
        self.fc2 = nn.Linear(50, 50)
        self.fc3 = nn.Linear(50, 3)

    def forward(self, x):
        x = purelin(self.fc1(x))
        x = purelin(self.fc2(x))
        x = purelin(self.fc3(x))
        return x

class FeedForwardNN(nn.Module):
    def __init__(self):
        super(FeedForwardNN, self).__init__()
        self.fc1 = nn.Linear(3, 50)
        self.fc2 = nn.Linear(50, 50)
        self.fc3 = nn.Linear(50, 3)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.bn = nn.BatchNorm1d(hidden_size)  # add batch normalization
        self.dropout = nn.Dropout(0.5)  # dropout layer

    def forward(self, x):
        x, _ = self.rnn(x.unsqueeze(1))  # add an extra dimension for timesteps
        x = self.dropout(x)  # add dropout
        x = self.fc(self.bn(x.squeeze(1)))  # apply batch normalization before fc
        return x
    
    
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2, dropout_rate=0.5):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout_rate)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x, _ = self.lstm(x.unsqueeze(1))  # add an extra dimension for timesteps
        x = self.dropout(x)
        x = self.fc(x.squeeze(1))  # remove the timesteps dimension
        return x



class ESN(nn.Module):
    def __init__(self, input_size, reservoir_size, output_size, alpha=0.5):
        super(ESN, self).__init__()
        self.input_weights = nn.Parameter(torch.randn(reservoir_size, input_size) / np.sqrt(input_size), requires_grad=False)
        self.reservoir_weights = nn.Parameter(torch.randn(reservoir_size, reservoir_size) / np.sqrt(reservoir_size), requires_grad=False)
        self.output_weights = nn.Linear(reservoir_size, output_size)

        spectral_radius = np.max(np.abs(np.linalg.eigvals(self.reservoir_weights.detach().numpy())))
        self.reservoir_weights.data = self.reservoir_weights.data / spectral_radius * alpha
        self.reservoir_size = reservoir_size

    def forward(self, input):
        reservoir_state = torch.zeros(input.size(0), input.size(1), self.reservoir_size, dtype=torch.float32, device=input.device)
        reservoir_state[:, 0, :] = torch.tanh(torch.mm(input[:,0,:], self.input_weights.t()))  # initialize reservoir state at t=0
        for t in range(1, input.size(1)):  # start loop from t=1
            reservoir_state[:, t, :] = torch.tanh(torch.mm(input[:,t,:], self.input_weights.t()) + torch.mm(reservoir_state[:, t-1, :], self.reservoir_weights.t()))
        output = torch.sigmoid(self.output_weights(reservoir_state))  # changed activation function
        return output


# train for rho = 10, 28, 40

## nn

In [41]:
# Generate training data for rho=10, 28, 40
rhos = [10, 28, 40]
training_input = []
training_output = []
for rho in rhos:
    np.random.seed(123)
    x0 = -15 + 30 * np.random.random((100, 3))

    x_t = np.asarray([integrate.odeint(lorenz_deriv, x0_j, t, args=(sigma, beta, rho)) for x0_j in x0])
    
    for j in range(100):
        training_input.append(x_t[j,:-1,:])
        training_output.append(x_t[j,1:,:])

training_input = np.vstack(training_input)
training_output = np.vstack(training_output)

# Convert numpy arrays to PyTorch tensors
training_input_torch = torch.tensor(training_input, dtype=torch.float32)
training_output_torch = torch.tensor(training_output, dtype=torch.float32)

# Initialize the model and optimizer
model_nn_10 = Net()
optimizer = Adam(model_nn_10.parameters())

# Define the loss function
criterion = nn.MSELoss()

# Train the network
for epoch in range(50):  # 50 epochs
    optimizer.zero_grad()   # zero the gradient buffers
    output = model_nn_10(training_input_torch)
    loss = criterion(output, training_output_torch)
    loss.backward()
    optimizer.step()    # Does the update
    if epoch % 2 == 0:
        print('For NN : Epoch: {}, Loss: {:.5f}'.format(epoch, loss.item()))


For NN : Epoch: 0, Loss: 277.21008
For NN : Epoch: 2, Loss: 217.84122
For NN : Epoch: 4, Loss: 166.72914
For NN : Epoch: 6, Loss: 123.80717
For NN : Epoch: 8, Loss: 89.08292
For NN : Epoch: 10, Loss: 62.46132
For NN : Epoch: 12, Loss: 43.60970
For NN : Epoch: 14, Loss: 31.80769
For NN : Epoch: 16, Loss: 25.79365
For NN : Epoch: 18, Loss: 23.71923
For NN : Epoch: 20, Loss: 23.38392
For NN : Epoch: 22, Loss: 22.82800
For NN : Epoch: 24, Loss: 20.96345
For NN : Epoch: 26, Loss: 17.73151
For NN : Epoch: 28, Loss: 13.76037
For NN : Epoch: 30, Loss: 9.87717
For NN : Epoch: 32, Loss: 6.74112
For NN : Epoch: 34, Loss: 4.67531
For NN : Epoch: 36, Loss: 3.66657
For NN : Epoch: 38, Loss: 3.46505
For NN : Epoch: 40, Loss: 3.71740
For NN : Epoch: 42, Loss: 4.08684
For NN : Epoch: 44, Loss: 4.33377
For NN : Epoch: 46, Loss: 4.34769
For NN : Epoch: 48, Loss: 4.13516


## FeedForwardNN: rho = 10

In [42]:

# Initialize the model and optimizer
model_FFNN_10 = FeedForwardNN()
optimizer = Adam(model_FFNN_10.parameters())

# Define the loss function
criterion = nn.MSELoss()

# Train the network
for epoch in range(50):  # 100 epochs
    optimizer.zero_grad()   # zero the gradient buffers
    output = model_FFNN_10(training_input_torch)
    loss = criterion(output, training_output_torch)
    loss.backward()
    optimizer.step()    # Does the update
    if epoch % 2 == 0:
        print('For FeedForwardNN: Epoch: {}, Loss: {:.5f}'.format(epoch, loss.item()))

For FeedForwardNN: Epoch: 0, Loss: 316.87064
For FeedForwardNN: Epoch: 2, Loss: 294.00208
For FeedForwardNN: Epoch: 4, Loss: 272.27576
For FeedForwardNN: Epoch: 6, Loss: 251.90163
For FeedForwardNN: Epoch: 8, Loss: 232.58051
For FeedForwardNN: Epoch: 10, Loss: 214.03636
For FeedForwardNN: Epoch: 12, Loss: 196.16214
For FeedForwardNN: Epoch: 14, Loss: 178.97263
For FeedForwardNN: Epoch: 16, Loss: 162.35989
For FeedForwardNN: Epoch: 18, Loss: 146.13577
For FeedForwardNN: Epoch: 20, Loss: 130.19473
For FeedForwardNN: Epoch: 22, Loss: 114.53085
For FeedForwardNN: Epoch: 24, Loss: 99.22081
For FeedForwardNN: Epoch: 26, Loss: 84.40943
For FeedForwardNN: Epoch: 28, Loss: 70.35100
For FeedForwardNN: Epoch: 30, Loss: 57.35516
For FeedForwardNN: Epoch: 32, Loss: 45.51680
For FeedForwardNN: Epoch: 34, Loss: 34.97275
For FeedForwardNN: Epoch: 36, Loss: 25.89125
For FeedForwardNN: Epoch: 38, Loss: 18.42566
For FeedForwardNN: Epoch: 40, Loss: 12.65402
For FeedForwardNN: Epoch: 42, Loss: 8.54422
For 

## SimpleRNN: rho = 10

In [43]:
# Generate training data for rho=10, 28, 40
training_input = []
training_output = []

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
model_rnn = SimpleRNN(3, 50, 3)  # input size = 3, hidden size = 50, output size = 3
optimizer = Adam(model_rnn.parameters(), lr=0.01)  # adjust the learning rate if necessary

model_rnn = SimpleRNN(3, 50, 3)  # input size = 3, hidden size = 50, output size = 3
optimizer = torch.optim.RMSprop(model_rnn.parameters(), lr=0.01)  # try RMSprop optimizer

# Define the loss function
criterion = nn.MSELoss()

# Train the network
best_loss = np.inf
epochs_no_improve = 0

for epoch in range(50):  # 30 epochs
    optimizer.zero_grad()   # zero the gradient buffers
    output = model_rnn(training_input_torch)
    loss = criterion(output, training_output_torch)

    # Add L2 regularization
    l2_reg = None
    for W in model_rnn.parameters():
        if l2_reg is None:
            l2_reg = W.norm(2)
        else:
            l2_reg = l2_reg + W.norm(2)
    loss += 0.01 * l2_reg

    loss.backward()

    # Gradient clipping
    torch.nn.utils.clip_grad_norm_(model_rnn.parameters(), max_norm=1)

    optimizer.step()    # Does the update

    # Update learning rate
    scheduler.step()

    # Early stopping
    if loss.item() < best_loss:
        epochs_no_improve = 0
        best_loss = loss.item()
    else:
        epochs_no_improve += 1
        # Check early stopping condition
        if epochs_no_improve == 5:
            print('Early stopping!')
            break

    if epoch % 2 == 0:
        print('For SimpleRNN: Epoch: {}, Loss: {:.5f}'.format(epoch, loss.item()))



For SimpleRNN: Epoch: 0, Loss: 293.63480
For SimpleRNN: Epoch: 2, Loss: 232.62386
For SimpleRNN: Epoch: 4, Loss: 182.26834
For SimpleRNN: Epoch: 6, Loss: 141.85216
For SimpleRNN: Epoch: 8, Loss: 116.19038
For SimpleRNN: Epoch: 10, Loss: 94.62010
For SimpleRNN: Epoch: 12, Loss: 75.66857
For SimpleRNN: Epoch: 14, Loss: 56.39511
For SimpleRNN: Epoch: 16, Loss: 40.04153
For SimpleRNN: Epoch: 18, Loss: 33.05820
For SimpleRNN: Epoch: 20, Loss: 29.68453
For SimpleRNN: Epoch: 22, Loss: 28.07904
For SimpleRNN: Epoch: 24, Loss: 23.15183
For SimpleRNN: Epoch: 26, Loss: 23.79162
For SimpleRNN: Epoch: 28, Loss: 24.65107
For SimpleRNN: Epoch: 30, Loss: 21.97364
For SimpleRNN: Epoch: 32, Loss: 22.37099
For SimpleRNN: Epoch: 34, Loss: 19.13055
For SimpleRNN: Epoch: 36, Loss: 18.09512
For SimpleRNN: Epoch: 38, Loss: 17.62892
For SimpleRNN: Epoch: 40, Loss: 16.92105
For SimpleRNN: Epoch: 42, Loss: 16.22765
For SimpleRNN: Epoch: 44, Loss: 15.63083
For SimpleRNN: Epoch: 46, Loss: 14.85840
For SimpleRNN: E

## LSTM:

In [44]:
# Initialize the model and optimizer
model_LSTM = LSTM(3, 100, 3)  # input size = 3, hidden size = 100, output size = 3
optimizer = torch.optim.RMSprop(model_LSTM.parameters(), lr=0.01)

# Define the loss function
criterion = nn.MSELoss()

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Train the network
for epoch in range(30):  # 30 epochs
    optimizer.zero_grad()   # zero the gradient buffers
    output = model_LSTM(training_input_torch)
    loss = criterion(output, training_output_torch)
    loss.backward()
    optimizer.step()    # Does the update

    # Step the scheduler
    scheduler.step()

    print('For LSTM : Epoch: {}, Loss: {:.5f}'.format(epoch, loss.item()))

For LSTM : Epoch: 0, Loss: 291.80551
For LSTM : Epoch: 1, Loss: 238.69255
For LSTM : Epoch: 2, Loss: 176.36925
For LSTM : Epoch: 3, Loss: 144.74005
For LSTM : Epoch: 4, Loss: 123.51939
For LSTM : Epoch: 5, Loss: 104.93462
For LSTM : Epoch: 6, Loss: 92.71381
For LSTM : Epoch: 7, Loss: 80.65710
For LSTM : Epoch: 8, Loss: 86.83304
For LSTM : Epoch: 9, Loss: 79.17844
For LSTM : Epoch: 10, Loss: 70.11256
For LSTM : Epoch: 11, Loss: 67.81390
For LSTM : Epoch: 12, Loss: 66.13931
For LSTM : Epoch: 13, Loss: 64.66452
For LSTM : Epoch: 14, Loss: 63.53388
For LSTM : Epoch: 15, Loss: 62.44979
For LSTM : Epoch: 16, Loss: 61.57392
For LSTM : Epoch: 17, Loss: 60.65466
For LSTM : Epoch: 18, Loss: 59.68523
For LSTM : Epoch: 19, Loss: 58.92725
For LSTM : Epoch: 20, Loss: 58.09414
For LSTM : Epoch: 21, Loss: 57.93455
For LSTM : Epoch: 22, Loss: 58.04333
For LSTM : Epoch: 23, Loss: 57.89446
For LSTM : Epoch: 24, Loss: 57.78593
For LSTM : Epoch: 25, Loss: 57.77925
For LSTM : Epoch: 26, Loss: 57.54692
For L

## ESN:

In [45]:
# Add an extra dimension for time step to the input tensor
training_input_torch_time = training_input_torch.unsqueeze(1)

# Define the models
model_esn_10 = ESN(3, 100, 3)  # increased reservoir size

# Initialize the optimaizer for ESN
optimizer_esn = torch.optim.Adam(model_esn_10.output_weights.parameters(), lr=0.01)

# Initialize the optimizer for ESN
optimizer_esn = torch.optim.RMSprop(model_esn_10.output_weights.parameters(), lr=0.01)  # changed optimizer

# Define the loss function
criterion = nn.MSELoss()

# Train the ESN network
best_loss_esn = np.inf
epochs_no_improve_esn = 0

for epoch in range(30):  # match the number of epochs with NN
    optimizer_esn.zero_grad()   # zero the gradient buffers
    output_esn = model_esn_10(training_input_torch_time)
    # Remove the time step dimension from the output for loss calculation
    output_esn = output_esn.squeeze(1)
    loss_esn = criterion(output_esn, training_output_torch)

    # Add L2 regularization
    l2_reg = None
    for W in model_esn_10.output_weights.parameters():
        if l2_reg is None:
            l2_reg = W.norm(2)
        else:
            l2_reg = l2_reg + W.norm(2)
    loss_esn += 0.01 * l2_reg  # L2 regularization

    loss_esn.backward()
    optimizer_esn.step()  # Does the update

    # Early stopping
    if loss_esn.item() < best_loss_esn:
        epochs_no_improve_esn = 0
        best_loss_esn = loss_esn.item()
    else:
        epochs_no_improve_esn += 1
        # Check early stopping condition
        if epochs_no_improve_esn == 5:
            print('Early stopping!')
            break

    print('For ESN : Epoch: {}, Loss: {:.5f}'.format(epoch, loss_esn.item()))


For ESN : Epoch: 0, Loss: 285.54865
For ESN : Epoch: 1, Loss: 275.62036
For ESN : Epoch: 2, Loss: 274.38834
For ESN : Epoch: 3, Loss: 273.57028
For ESN : Epoch: 4, Loss: 273.41779
For ESN : Epoch: 5, Loss: 273.36407
For ESN : Epoch: 6, Loss: 273.34329
For ESN : Epoch: 7, Loss: 273.32999
For ESN : Epoch: 8, Loss: 273.31857
For ESN : Epoch: 9, Loss: 273.30835
For ESN : Epoch: 10, Loss: 273.29919
For ESN : Epoch: 11, Loss: 273.29083
For ESN : Epoch: 12, Loss: 273.28326
For ESN : Epoch: 13, Loss: 273.27634
For ESN : Epoch: 14, Loss: 273.26996
For ESN : Epoch: 15, Loss: 273.26401
For ESN : Epoch: 16, Loss: 273.25854
For ESN : Epoch: 17, Loss: 273.25345
For ESN : Epoch: 18, Loss: 273.24869
For ESN : Epoch: 19, Loss: 273.24423
For ESN : Epoch: 20, Loss: 273.24005
For ESN : Epoch: 21, Loss: 273.23608
For ESN : Epoch: 22, Loss: 273.23239
For ESN : Epoch: 23, Loss: 273.22885
For ESN : Epoch: 24, Loss: 273.22556
For ESN : Epoch: 25, Loss: 273.22238
For ESN : Epoch: 26, Loss: 273.21939
For ESN : E

In [46]:
rhos = [17, 35]

def generate_lorenz_data(rho, initial_state=[1, 1, 1], dt=0.01, N=10000, sigma=10, beta=8/3):
    t = np.arange(0, N*dt, dt)
    traj = integrate.odeint(lorenz_deriv, initial_state, t, args=(sigma, beta, rho))
    return traj[:-1, :], traj[1:, :]

for rho in rhos:
    print(f"\nForecasting dynamics for rho = {rho}:")

    # Generate Lorenz system data for rho
    nn_input, nn_output = generate_lorenz_data(rho)

    # Train-test split
    train_frac = 0.8
    split_idx = int(train_frac * len(nn_input))
    train_input, train_output = nn_input[:split_idx], nn_output[:split_idx]
    test_input, test_output = nn_input[split_idx:], nn_output[split_idx:]

    # Convert to torch tensors
    train_input_torch = torch.from_numpy(train_input.astype(np.float32))
    train_output_torch = torch.from_numpy(train_output.astype(np.float32))
    test_input_torch = torch.from_numpy(test_input.astype(np.float32))
    test_output_torch = torch.from_numpy(test_output.astype(np.float32))

    # Define the models
    model1 = model_nn_10
    model2 = model_FFNN_10
    model3 = model_LSTM
    model4 = model_rnn
    model5 = model_esn_10
  

    # Define the loss function
    criterion = nn.MSELoss()

    # Train each model and make predictions
    for model in [model1, model2, model3, model4]:
        # Define the optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

        # Train the model
        for epoch in range(100):
            optimizer.zero_grad()
            output = model(train_input_torch)
            loss = criterion(output, train_output_torch)
            loss.backward()
            optimizer.step()

        # Make predictions on the test data
        model.eval()
        with torch.no_grad():
            predictions = model(test_input_torch)

        # Compute the mean squared error of the predictions
        mse = mean_squared_error(test_output_torch.detach().numpy(), predictions.detach().numpy())
        print(f"Mean Squared Error for model {model.__class__.__name__}: {mse}")
    


Forecasting dynamics for rho = 17:
Mean Squared Error for model Net: 0.0003400488931220025
Mean Squared Error for model FeedForwardNN: 0.00012982710904907435
Mean Squared Error for model LSTM: 0.005836538504809141
Mean Squared Error for model SimpleRNN: 0.06037695333361626

Forecasting dynamics for rho = 35:
Mean Squared Error for model Net: 0.39486798644065857
Mean Squared Error for model FeedForwardNN: 0.10435580462217331
Mean Squared Error for model LSTM: 0.4556368291378021
Mean Squared Error for model SimpleRNN: 0.239154651761055


In [47]:
for rho in rhos:
    print(f"\nForecasting dynamics for rho = {rho}:")

    # Generate Lorenz system data for rho
    nn_input, nn_output = generate_lorenz_data(rho)

    # Train-test split
    train_frac = 0.8
    split_idx = int(train_frac * len(nn_input))
    train_input, train_output = nn_input[:split_idx], nn_output[:split_idx]
    test_input, test_output = nn_input[split_idx:], nn_output[split_idx:]

    # Convert to torch tensors and add time dimension
    train_input_torch = torch.from_numpy(train_input.astype(np.float32)).unsqueeze(1)
    train_output_torch = torch.from_numpy(train_output.astype(np.float32))
    test_input_torch = torch.from_numpy(test_input.astype(np.float32)).unsqueeze(1)
    test_output_torch = torch.from_numpy(test_output.astype(np.float32))

    # Define the models
    model5 = model_esn_10

    # Define the loss function
    criterion = nn.MSELoss()

    # Train each model and make predictions
    for model in [model5]:
        # Define the optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

        # Train the model
        for epoch in range(100):
            optimizer.zero_grad()
            output = model(train_input_torch)
            output = output.squeeze(1)  # remove the time dimension for ESN output
            loss = criterion(output, train_output_torch)
            loss.backward()
            optimizer.step()

        # Make predictions on the test data
        model.eval()
        with torch.no_grad():
            predictions = model(test_input_torch)
            predictions = predictions.squeeze(1)  # remove the time dimension for ESN predictions

        # Compute the mean squared error of the predictions
        mse = mean_squared_error(test_output_torch.detach().numpy(), predictions.detach().numpy())
        print(f"Mean Squared Error for model {model.__class__.__name__}: {mse}")



Forecasting dynamics for rho = 17:
Mean Squared Error for model ESN: 103.44430541992188

Forecasting dynamics for rho = 35:
Mean Squared Error for model ESN: 384.436767578125
