<a href="https://colab.research.google.com/github/carissa406/UIS/blob/main/Carissa_Hicks_assignment1_numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Modifying the model in lab 3.2 to do Regression

In [2]:
#import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
def initialize_parameters(nx,nh,ny):
    #set the random seed so the same random values are generated every time you run this function
    np.random.seed(1)

    #initialize weights to small random numbers and biases to zeros for each layer
    W1=np.random.uniform(size=(nh,nx), low=-0.01, high=0.01)
    b1=np.zeros((nh,1))
    W2=np.random.uniform(size=(ny,nh), low=-0.01, high=0.01)
    b2=np.zeros((ny,1))
   
    #create a dictionary of network parameters
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [4]:
#Forward Pass
#relu activation
def relu(z):
    return np.maximum(0,z)

In [5]:
def forward_pass(parameters,X):
    Z1= np.dot(parameters["W1"],X)+parameters["b1"] # b1 is broadcasted n times before it is added to np.dpt(W1,X1)
    A1=relu(Z1)
    Z2=np.dot(parameters["W2"],A1)+parameters["b2"] #b2 is broadcasted n times before it is added to np.dpt(W2,A1)
    Yhat=Z2 #sigmoid removed because regression doesnt use activation function, so Yhat is just Z2
       
    cache = {"A1": A1,
             "Z1":Z1,
             "Z2": Z2}
    return Yhat,cache

In [6]:
#using mean squared error for the loss function
def compute_loss(Y,Yhat):
    n=Y.shape[1]
    loss = (1/n) + np.sum((Yhat - Y)**2)
    return loss

In [7]:
#Backward Pass
#Gradient of Mean Squared Error loss function
def dMSE (Y, Yhat):
    return (Yhat - Y)

#Derivative of Relu
def drelu(Z):
    drelu=np.where(Z>0, 1.0, 0.0) 
    return drelu 

In [8]:
def backward_pass(parameters, cache, X, Y, Yhat):
    n=X.shape[1]

    dZ2=dMSE(𝑌,Yhat )(cache["Z2"])
    dW2=(1/n)*np.dot(dZ2,cache["A1"].T)
    db2=(1/n)*np.sum(dZ2, axis=1, keepdims=True)
    dA1=np.dot(parameters["W2"].T,dZ2)
    dZ1=dA1*drelu(cache["Z1"])
    dW1=(1/n)*np.dot(dZ1,X.T)
    db1=(1/n)*np.sum(dZ1, axis=1, keepdims=True)
    gradients={"dW1": dW1,
             "db1": db1,
             "dW2":dW2,
              "db2":db2
              }
    return gradients

In [9]:
def update_parameters(parameters, gradients, learning_rate):
    parameters["W1"]=parameters["W1"]-learning_rate*gradients["dW1"]
    parameters["W2"]=parameters["W2"]-learning_rate*gradients["dW2"]
    parameters["b1"]=parameters["b1"]-learning_rate*gradients["db1"]
    parameters["b2"]=parameters["b2"]-learning_rate*gradients["db2"]
    return parameters

In [10]:
def create_nn_model(train_X,train_Y,nh, val_X, val_Y, num_iterations, learning_rate):
  
    assert(train_X.shape[0]==val_X.shape[0]), "train_X and val_X must have the same number of features"
    assert(train_X.shape[1]==train_Y.size), "train_X and train_Y must have the same number of examples"
    assert(val_X.shape[1]==val_Y.size), "val_X and val_Y must have the same number of examples" 
    
    
    #getting the number of features
    nx=train_X.shape[0]
    
    #one neuron in output layer with no activation function
    ny=1
    
    # initializing the parameteres
    parameters=initialize_parameters(nx,nh,ny)
    
    
    #initialize lists to store the training and valideation losses for each iteration. 
    val_loss=[]
    train_loss=[]
    
    #run num_iterations of gradient descent
    for i in range (0, num_iterations):
        #run the forward pass on train_X
        Yhat_train, train_cache= forward_pass(parameters,train_X)
        
        #run the forward pass on val_X
        Yhat_val,val_cache= forward_pass(parameters,val_X)
        
        #compute the loss on the train and val datasets
        train_loss.append(compute_loss(train_Y,Yhat_train))
        val_loss.append(compute_loss(val_Y,Yhat_val))

        """
        run the backward pass. Note that the backward pass is only run on the training data not the validation data
        Because the learning must be only done on the training data and hence, validation data is not used to update
        the model parameters.  
        """
        gradients=backward_pass(parameters, train_cache, train_X, train_Y,Yhat_train)
        
        # update the parameters
        parameters=update_parameters(parameters, gradients, learning_rate)
        
        #print the trianing loss and validation loss for each iteration.
        print("iteration {} :train_loss:{} val_loss{}".format(i,train_loss[i],val_loss[i]))
        
    #create a dictionary history and put train_loss and validaiton_loss in it
    history={"val_loss": val_loss,
             "train_loss": train_loss}
        
        #return the parameters and the history
    return parameters, history

In [11]:
#get predictions
def predict(parameters,X):
    predicted_label=forward_pass(parameters, X)
    return predicted_label

In [12]:
#evaluate accuracy
def accuracy(observedY,predictedY):
    #return the ratio of the examples for which predictedY=observedY over the total number of examples
    return float(np.sum(predictedY==observedY))/observedY.size

2. Preparing California Housing Data

In [61]:
#load the data
df = pd.read_csv("sample_data/california_housing_train.csv")

In [62]:
df.shape

(17000, 9)

In [63]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936,66900.0
1,-114.47,34.4,19.0,7650.0,1901.0,1129.0,463.0,1.82,80100.0
2,-114.56,33.69,17.0,720.0,174.0,333.0,117.0,1.6509,85700.0
3,-114.57,33.64,14.0,1501.0,337.0,515.0,226.0,3.1917,73400.0
4,-114.57,33.57,20.0,1454.0,326.0,624.0,262.0,1.925,65500.0


In [64]:
#split training data into 80% training and 20% validation
train = df.sample(frac=0.8, random_state=123)
val = df.drop(train.index)

In [65]:
print(train.shape)
print(val.shape)

(13600, 9)
(3400, 9)


In [66]:
# reading the input datasets train.csv and validation.csv and store them into numpy arrays
train = train.to_numpy()
test = pd.read_csv('sample_data/california_housing_test.csv').to_numpy()
val = val.to_numpy()

In [67]:
test.shape

(3000, 9)

In [68]:
#separate the features from the target variable (median_house_value) in train, val and test
train_X = train[:,:-1]
train_Y = train[...,-1] #labels

test_X = test[:,:-1]
test_Y = test[...,-1] #labels

val_X = val[:,:-1]
val_Y = val[...,-1] #labels

In [69]:
print(train_X.shape)
print(test_X.shape)
print(val_X.shape)
print(train_Y.shape)
print(test_Y.shape)
print(val_Y.shape)
print(test_Y)

(13600, 8)
(3000, 8)
(3400, 8)
(13600,)
(3000,)
(3400,)
[344700. 176500. 270500. ...  62000. 162500. 500001.]


In [70]:
#normalize the data: subtract mean of each feature and divide by the std, so that the feature is centered around 0 and has a unit std
train_norm = (train_X - np.mean(train_X, axis=0))/np.std(train_X, axis=0)
test_norm = (test_X - np.mean(train_X, axis=0))/np.std(train_X, axis=0)
val_norm = (val_X - np.mean(train_X, axis=0))/np.std(train_X, axis=0)

print(train_norm.shape)
print(test_norm.shape)
print(val_norm.shape)

(13600, 8)
(3000, 8)
(3400, 8)


In [71]:
#divide the median_house_values by 100k to scale them down
train_norm = train_norm/100000
test_norm = test_norm/100000
val_norm = val_norm/100000

In [72]:
#transpose feature matricies for train,test,val and reshape target vectors to 2D arrays
train_norm = train_norm.transpose()
test_norm = test_norm.transpose()
val_norm = val_norm.transpose()

In [73]:
train_Y=np.reshape(train_Y, (1, train_Y.size))
test_Y=np.reshape(test_Y, (1, test_Y.size))
val_Y=np.reshape(val_Y, (1, val_Y.size))

In [74]:
print(train_norm.shape)
print(train_Y.shape)
print(val_norm.shape)
print(val_Y.shape)
print(test_norm.shape)
print(test_Y.shape)

(8, 13600)
(1, 13600)
(8, 3400)
(1, 3400)
(8, 3000)
(1, 3000)


3. Training and hyper-parameter tuning

In [75]:
iterations=2000
parameters, history=create_nn_model(train_norm,train_Y,50, val_norm, val_Y, iterations, 0.01)

TypeError: ignored

In [None]:
plt.plot(range(0,iterations),history["train_loss"],'b')
plt.plot(range(0,iterations),history["val_loss"],'r')
plt.ylabel('loss')
plt.xlabel('iterations')
plt.show()

In [None]:

predicted_train=predict(parameters, train_X)
predicted_val=predict(parameters, val_X)

print("accurracy of the model on the training data is:", accuracy(train_Y,predicted_train))
print("accurracy of the model on the validation data is:", accuracy(val_Y,predicted_val))