In [1]:
##TASK1 
import io
import requests
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from math import sin
import pickle

url = 'https://drive.switch.ch/index.php/s/TeDwnbYsBKRuJjv/download'  
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# x is a Numpy array of shape (n_samples, n_features) with the inputs
x = data.f.x

# y is a Numpy array of shape (n_samples, ) with the targets
y = data.f.y

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# call the model 
model = LinearRegression()

#define the np array that contains the model for the training set
x_train_compact = np.column_stack((x_train[:, 0],x_train[:, 1], np.sin(x_train[:, 1]), x_train[:, 0]*x_train[:, 1]))
                                                                                             
#fit the data
model.fit(x_train_compact, y_train)

#define the np array that contains the model for the test set
x_test_compact = np.column_stack((x_test[:, 0],x_test[:, 1], np.sin(x_test[:, 1]), x_test[:, 0]*x_test[:, 1]))

#making predictions
test_predictions = model.predict(x_test_compact)
train_predictions = model.predict(x_train_compact)

#compute the mean squared error for the train and test sets
train_mse = mean_squared_error(y_train, train_predictions)
test_mse = mean_squared_error(y_test, test_predictions)

#print results
print("Training MSE:",train_mse )
print("Testing MSE:",test_mse )

# Print the formula of the model 
theta = np.append(model.intercept_, model.coef_)
print(f"Model formula: f(x) = {theta[0]:.2f} + {theta[1]:.2f}*x1 + {theta[2]:.2f}*x2 + {theta[3]:.2f}*sin(x2) + {theta[4]:.2f}*x1*x2")

# Save the model as a pickle file
with open('model_task1', 'wb') as f:
    pickle.dump(model, f)

###########################################################################

##TASK2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
import io
import requests
from sklearn.metrics import mean_squared_error 
from math import sin
import pickle

url = 'https://drive.switch.ch/index.php/s/TeDwnbYsBKRuJjv/download'  # Data location
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# x is a Numpy array of shape (n_samples, n_features) with the inputs
x = data.f.x

# y is a Numpy array of shape (n_samples, ) with the targets
y = data.f.y

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

#create the array for the model for the training set 
x1_train = x_train[:,0].reshape(-1,1) # values of the first column
x2_train = x_train[:,1].reshape(-1,1)# values of the second column
x3_train= np.sin(x2_train)
x4_train = x1_train*x2_train 
x_compact_train = np.hstack((np.ones((x_train.shape[0],1)), x1_train,x2_train,x3_train,x4_train)) # The compact input

#create the array for the model for the test set 
x1_test = x_test[:,0].reshape(-1,1) # values of the first column
x2_test= x_test[:,1].reshape(-1,1)# values of the second column
x3_test= np.sin(x2_test)
x4_test = x1_test*x2_test
x_compact_test = np.hstack((np.ones((x_test.shape[0],1)), x1_test,x2_test,x3_test,x4_test)) # The compact input

# Build the neural network model
model = Sequential()
model.add(Dense(20, input_dim=5, activation='relu')) #input layer with relu activation function
model.add(Dense(10, activation='relu')) #hidden layer with relu activation function
model.add(Dense(1)) #output layer

# Compile the model
model.compile(loss='mean_squared_error', optimizer='sgd') #stochastic gradient descent optimizer

# Train the model
model.fit(x_compact_train, y_train, epochs=100, batch_size=32, verbose=0)

# Predict on the training and test sets
train_predictions = model.predict(x_compact_train)
test_predictions = model.predict(x_compact_test)

# Evaluate the model on the training and test set 
mse_test = mean_squared_error(y_test, test_predictions)
mse_train = mean_squared_error(y_train, train_predictions)

#print the results 
print(f"Mean Squared Error on training set: {mse_train:.2f}")
print(f"Mean Squared Error on test set: {mse_test:.2f}")

#Save the model as a pickle file
with open('model_task2', 'wb') as f:
     pickle.dump(model, f)
    
########################################################################### 

##MODEL TASK3
import io
import requests
import numpy as np
from math import sin
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pickle
import random
import tensorflow as tf

url = 'https://drive.switch.ch/index.php/s/TeDwnbYsBKRuJjv/download'  # Data location
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

#set seed 
random.seed(234)
np.random.seed(234)
tf.random.set_seed(234)

#x is a Numpy array of shape (n_samples, n_features) with the inputs
x = data.f.x

#y is a Numpy array of shape (n_samples, ) with the targets
y = data.f.y

#Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

#Build the neural network model
model = Sequential()
model.add(Dense(64, input_dim=2, activation='relu'))  #input layer with relu activation
model.add(Dense(32, activation='relu')) #first hidden layer with relu activation
model.add(Dense(16, activation='relu')) #second hidden layer with relu activation
model.add(Dense(1))  #Output layer

#Compile the model
model.compile(loss='mse', optimizer='adam')

#Train the model
model.fit(x_train, y_train, epochs=300, batch_size=32, validation_split=0.1, verbose = 0)

#Use the model to make predictions on the training and test sets
y_train_pred = model.predict(x_train)
y_test_pred = model.predict(x_test)

#Evaluate the model on the training and test sets
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test,y_test_pred)

#print the results 
print("Training MSE:", train_mse)
print("Test MSE:", test_mse)

#Save the model as a pickle file
with open('model_task3', 'wb') as f:
    pickle.dump(model, f)


Training MSE: 0.7231636462303825
Testing MSE: 0.670288340179917
Model formula: f(x) = 1.31 + -0.05*x1 + -0.58*x2 + 0.47*sin(x2) + 0.04*x1*x2
Mean Squared Error on training set: 0.10
Mean Squared Error on test set: 0.12
Training MSE: 0.014624534459571237
Test MSE: 0.01762934118616797


In [2]:
# Import libraries
import joblib
import io
import requests
import numpy as np

def evaluate_predictions(y_true, y_pred):
    """
    Evaluates the mean squared error between the values in y_true and the values
    in y_pred.
    ### YOU CAN NOT EDIT THIS FUNCTION ###
    :param y_true: Numpy array, the true target values from the test set;
    :param y_pred: Numpy array, the values predicted by your model.
    :return: float, the mean squared error between the two arrays.
    """
    assert y_true.shape == y_pred.shape
    return ((y_true - y_pred) ** 2).mean()


def load_model(filename):
    """
    Loads a Scikit-learn model saved with joblib.dump.
    This is just an example, you can write your own function to load the model.
    Some examples can be found in src/utils.py.
    :param filename: string, path to the file storing the model.
    :return: the model.
    """
    model = joblib.load(filename)

    return model

# Load the data
# This will be replaced with our private test data when grading the assignment

# Load data from url
url = 'https://drive.switch.ch/index.php/s/TeDwnbYsBKRuJjv/download'
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# Alternatively yo can load the data from file
# data_path = '../data/data.npz'
# data = np.load(data_path)

##################################################################### 

## Preprocessing data for task1
#x = data.f.x
#x_compact = np.column_stack((x[:, 0],x[:, 1], np.sin(x[:, 1]), x[:, 0]*x[:, 1]))
                       
## y is a Numpy array of shape (n_samples, ) with the targets
#y = data.f.y

## Load the trained model
#baseline_model_path = './model_task1'
#baseline_model = load_model(baseline_model_path)

## Predict on the given samples
#y_pred = baseline_model.predict(x_compact)

#####################################################################

## Preprocessing data for task2 
#x = data.f.x
#x1= x[:,0].reshape(-1,1) # values of the first column
#x2 = x[:,1].reshape(-1,1)# values of the second column
#x3= np.sin(x2)
#x4= x1*x2

#x_compact = np.hstack((np.ones((x.shape[0],1)), x1, x2, x3, x4)) # The compact input

## y is a Numpy array of shape (n_samples, ) with the targets
#y = data.f.y
#y = y.reshape(-1,1)

## Load the trained model
#baseline_model_path = './model_task2'
#baseline_model = load_model(baseline_model_path)

## Predict on the given samples
#y_pred = baseline_model.predict(x_compact)

#####################################################################

##CODE FOR TASK3

# x is a Numpy array of shape (n_samples, n_features) with the inputs
x = data.f.x
# y is a Numpy array of shape (n_samples, ) with the targets
y = data.f.y

y = y.reshape(-1,1)

# Load the trained model
baseline_model_path = './model_task3' 
baseline_model = load_model(baseline_model_path)

# Predict on the given samples
y_pred = baseline_model.predict(x)

############################################################################
# STOP EDITABLE SECTION: do not modify anything below this point.
############################################################################

# Evaluate the prediction using MSE
mse = evaluate_predictions(y_pred, y)
print(f'MSE on whole dataset: {mse}')

# NOTE: NOW THIS CELL IS NOT WORKING SINCE YOU NEED TO CHANGE THE INPUT.
# DO IT AND EVERYTHING RUNS SMOOTH

MSE on whole dataset: 0.015525976477550255
