# Project 1 FYS-STK4155

## Part a): Ordinary Least Square (OLS) on the Franke function

### Franke Function

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import numpy as np
from random import random, seed
import sys
sys.path.append("../")
import functions as f
plt.style.use('seaborn-v0_8-whitegrid')
#plt.style.available
import seaborn as sns
import load_data as ld

In [None]:
# Standarize the data
np.random.seed(42)
# Make data.
x = np.arange(0, 1, 0.05)
y = np.arange(0, 1, 0.05)
x, y = np.meshgrid(x,y)

z = f.FrankeFunction(x, y)

In [None]:
fig = plt.figure()
#ax = fig.gca(projection='3d')
ax = fig.add_subplot(111, projection='3d')

# Plot the surface.
surf = ax.plot_surface(x, y, z, cmap=cm.coolwarm,
linewidth=0, antialiased=False)

# Customize the z axis.
ax.set_zlim(-0.10, 1.40)
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter("%.02f"))

# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.show()

Most off the code in the Franke function task is taken from the description in Project 1. 

### Own Code

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
x = np.arange(0, 1, 0.001)
y = np.arange(0, 1, 0.001)
xv, yv = np.meshgrid(x, y)
z = f.FrankeFunction(xv, yv) #Use xv and yv only when plotting the FranckeFunction
z = z #+ np.random.randn(z.shape[0]) noise messes up here?

# Polynomial degree
degree = 3

# Creating design matrix
X = f.create_design_matrix(x, y, degree)
#X = np.identity(X.shape[0]) #to test if it works

print("X.shape", X.shape)
print("z.shape", z.shape)

OLSbeta = np.linalg.pinv(X.T @ X) @ X.T @ z # pinv avoid singular matrix problem, same as reg term

ztilde = X @ OLSbeta

mse = np.mean((z - ztilde)**2)
R2 = 1 - np.sum((z - ztilde)**2) / np.sum((z - np.mean(z))**2)

#print(f"Beta: {OLSbeta}")
print(f"Mse: {mse}")
print(f"R2: {R2}")

fig = plt.figure()
#ax = fig.gca(projection='3d')
ax = fig.add_subplot(111, projection='3d')

# Plot the surface.
surf = ax.plot_surface(xv, yv, ztilde, cmap=cm.coolwarm,
linewidth=0, antialiased=False)

# Customize the z axis.
ax.set_zlim(-0.10, 1.40)
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter("%.02f"))

# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.show()


# Test set up with identity matrix

In [None]:
x, y, z = ld.load_normal_data()

# Polynomial degrees
degrees = np.arange(0, 5)

# Empty lists to store scores and parameters
beta_values = []
mse_scores = []
r2_scores = []

# Looping through each degree
for degree in degrees:
    # Creating design matrix
    X = f.create_design_matrix(x, y, degree)
    print("design matrix shape", X.shape)
    X = np.identity(X.shape[0])

    # Split the data into training and test data
    X_train, X_test, z_train, z_test = train_test_split(X, z, test_size=0.2, random_state=42)

    # Scale and center the data
    scaler = StandardScaler(with_std=True, with_mean=False)
    #X_train = scaler.fit_transform(X_train)
    #X_test = scaler.transform(X_test)
    
    # Calculating OLS beta
    OLSbeta = f.beta_OLS(X_train, z_train)
    print("olsbeta.shape", OLSbeta.shape)
    
    # Calculating ztilde
    ztilde = f.z_predict(X_train, OLSbeta)
    
    # Calculating MSE and R2
    mse = f.mse(z_train, ztilde)
    print("mse", mse)
    r2 = f.r2(z_train, ztilde)
    print("r2", r2)

    # Appending beta values and scores
    beta_values.append(OLSbeta)
    mse_scores.append(mse)
    r2_scores.append(r2)


result with identity matrix and without scaling: mse = 0 

# Compute with random data from the normal distribution

In [None]:
x, y, z = ld.load_normal_data(N_samples=50, seed=2024)

# Polynomial degrees
degrees = np.arange(0, 6)

# Empty lists to store scores and parameters
beta_values = []
mse_scores = []
r2_scores = []

# Looping through each degree
for degree in degrees:
    # Creating design matrix
    X = f.create_design_matrix(x, y, degree)

    # Split the data into training and test data
    X_train, X_test, z_train, z_test = train_test_split(X, z, test_size=0.2, random_state=42)

    # Scale and center the data
    scaler = StandardScaler(with_std=True, with_mean=False)
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Calculating OLS beta
    OLSbeta = f.beta_OLS(X_train, z_train)

    # Calculating ztilde
    ztilde = f.z_predict(X_test, OLSbeta)
    
    # Calculating MSE and R2
    mse = f.mse(z_test, ztilde)
    r2 = f.r2(z_test, ztilde)

    # Appending beta values and scores
    beta_values.append(OLSbeta)
    mse_scores.append(mse)
    r2_scores.append(r2)

In [None]:
# Plotting MSE and R2 scores
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(degrees, mse_scores, marker='o')
plt.xlabel('Polynomial Degree')
plt.ylabel('MSE')
plt.title('MSE as a function of Polynomial Degree')

plt.subplot(1, 2, 2)
plt.plot(degrees, r2_scores, marker='o')
plt.xlabel('Polynomial Degree')
plt.ylabel('R2')
plt.title('R2 as a function of Polynomial Degree')
plt.tight_layout()
#plt.savefig(fname="../../results/ols_error_degree.png")
#plt.show()

In [None]:
for degree, values in enumerate(beta_values):
    print("degree", degree, "betavalues", len(values))
    print(values)
    print(" ")
    degrees = np.repeat(degree, len(values))
    plt.scatter(degrees, values)

plt.xlabel('Polynomial Degree')
plt.ylabel('Beta Values')
plt.title('Beta values as a function of a Polynomial Degree')

