In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

class RidgeRegression:
    def __init__(self, alpha=1.0):
        self.alpha = alpha
        self.theta = None

    def fit(self, X, y):
        # Add a bias term (intercept)
        X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add intercept
        
        # Closed-form solution for Ridge Regression
        n, m = X_b.shape
        I = np.eye(m)  # Identity matrix
        self.theta = np.linalg.inv(X_b.T @ X_b + self.alpha * I) @ X_b.T @ y

    def predict(self, X):
        # Add a bias term to the input data
        X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add intercept
        return X_b @ self.theta

    def cross_val_score(self, X, y, cv=5):
        """ Perform cross-validation and return the mean squared error for each fold. """
        n = X.shape[0]
        indices = np.arange(n)
        np.random.shuffle(indices)
        fold_size = n // cv
        scores = []

        for fold in range(cv):
            val_indices = indices[fold * fold_size:(fold + 1) * fold_size]
            train_indices = np.concatenate((indices[:fold * fold_size], indices[(fold + 1) * fold_size:]))
            
            X_train, X_val = X[train_indices], X[val_indices]
            y_train, y_val = y[train_indices], y[val_indices]

            self.fit(X_train, y_train)
            predictions = self.predict(X_val)
            mse = np.mean((y_val - predictions) ** 2)
            scores.append(mse)

        return np.mean(scores)

# Load the data
pend = pd.read_csv('hw5_ME644.csv')

# Extract the columns as NumPy arrays
Theta = pend.iloc[:, 0].to_numpy()          # Angular displacement
Theta_dot = pend.iloc[:, 1].to_numpy()      # Angular velocity
Theta_Ddot = pend.iloc[:, 2].to_numpy()     # Angular acceleration

# (a) Calculate correlation matrix
correlation_matrix = pend.corr()
print("Correlation Matrix:")
print(correlation_matrix)

# Scatter plot: Theta vs Theta_dot
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(Theta, Theta_dot, color='green', alpha=0.7)
plt.title('Theta vs Theta Dot')
plt.xlabel('Theta (rad)')
plt.ylabel('Theta Dot')

# Scatter plot: Theta_Ddot vs Theta_dot
plt.subplot(1, 2, 2)
plt.scatter(Theta_dot, Theta_Ddot, color='purple', alpha=0.7)
plt.title('Theta Double Dot vs Theta Dot')
plt.xlabel('Theta Dot')
plt.ylabel('Theta Double Dot')

plt.tight_layout()
plt.show()

# Calculate sin(Theta) for later use
sin_Theta = np.sin(Theta)

# Plot sin(Theta) vs Theta_Ddot
plt.figure(figsize=(8, 5))
plt.scatter(sin_Theta, Theta_Ddot, color='blue', alpha=0.7)
plt.title('sin(Theta) vs Theta Double Dot')
plt.xlabel('sin(Theta)')
plt.ylabel('Theta Double Dot')
plt.grid()
plt.show()

plt.figure(figsize=(8, 5))
plt.scatter(Theta, Theta_Ddot, color='blue', alpha=0.7)
plt.title('Theta vs Theta Double Dot')
plt.xlabel('Theta')
plt.ylabel('Theta Double Dot')
plt.grid()
plt.show()

# (b) Prepare the data for regression
# Create the feature matrix X with only Theta and sin(Theta)
X = np.column_stack((Theta, sin_Theta))
y = Theta_Ddot

# Create and fit the Ridge Regression model
ridge_reg = RidgeRegression(alpha=1.0)  # You can adjust alpha as needed
ridge_reg.fit(X, y)

# Make predictions
predictions = ridge_reg.predict(X)

# Print the coefficients
print("Coefficients (theta):", ridge_reg.theta)

# Cross-validation score
mean_mse = ridge_reg.cross_val_score(X, y, cv=5)
print("Mean Cross-Validation MSE:", mean_mse)



ModuleNotFoundError: No module named 'numpy'