In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv('/content/drive/MyDrive/Concepts and Technology of AI/week6/HousePrice/Untitled folder/Houseprice.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   HouseAge    100 non-null    int64  
 1   HouseFloor  100 non-null    int64  
 2   HouseArea   100 non-null    float64
 3   HousePrice  100 non-null    float64
dtypes: float64(2), int64(2)
memory usage: 3.2 KB


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
X = data.drop(columns=['HousePrice']).values
Y = data['HousePrice'].values

In [4]:
def train_test_split (X,Y, test_size=0.3, random_seed=42):
  np.random.seed(random_seed)
  indices = np.arange(X.shape[0])
  np.random.shuffle(indices)

  test_split_size = int(len(X) * test_size)
  test_indices = indices[:test_split_size]
  train_indices = indices[test_split_size:]

  X_train, X_test = X[train_indices], X[test_indices]
  Y_train, Y_test = Y[train_indices], Y[test_indices]

  return X_train, X_test, Y_train, Y_test



In [5]:
def cost_function(X, Y, W):
  """ Parameters:
  This function finds the Mean Square Error.
  Input parameters:
  X: Feature Matrix
  Y: Target Matrix
  W: Weight Matrix
  Output Parameters:
  cost: accumulated mean square error.
  """
  m = len(Y)
  Y_pred = np.dot(X,W)
  error = Y_pred - Y
  cost = (1/(2 * m)) * np.sum(error**2)
  return cost

In [6]:
# Test case
X_test = np.array([[1, 2], [3, 4], [5, 6]])
Y_test = np.array([3, 7, 11])
W_test = np.array([1, 1])
cost = cost_function(X_test, Y_test, W_test)
if cost == 0:
  print("Proceed Further")
else:
  print("something went wrong: Reimplement a cost function")
  print("Cost function output:", cost_function(X_test, Y_test, W_test))

Proceed Further


In [7]:
def gradient_descent(X, Y, W, alpha, iterations):
  """
  Perform gradient descent to optimize the parameters of a linear regression model.
  Parameters:
  X (numpy.ndarray): Feature matrix (m x n).
  Y (numpy.ndarray): Target vector (m x 1).
  W (numpy.ndarray): Initial guess for parameters (n x 1).
  alpha (float): Learning rate.
  iterations (int): Number of iterations for gradient descent.
  Returns:
  tuple: A tuple containing the final optimized parameters (W_update) and the history of cost values
  .
  W_update (numpy.ndarray): Updated parameters (n x 1).
  cost_history (list): History of cost values over iterations.
  """
  # Initialize cost history
  cost_history = [0] * iterations
  # Number of samples
  W_update = W
  m = len(Y)
  for iteration in range(iterations):
    # Step 1: Hypothesis Values
    Y_pred = np.dot(X,W) # Your Code Here
    # Step 2: Difference between Hypothesis and Actual Y
    loss = Y_pred - Y # Your Code Here
    # Step 3: Gradient Calculation
    dw = (1/m) * np.dot(X.T,loss) # Your Code Here
    # Step 4: Updating Values of W using Gradient
    W_update = W_update - alpha * dw # Your Code Here
    # Step 5: New Cost Value
    cost = cost_function(X, Y, W_update)
    cost_history[iteration] = cost
  return W_update, cost_history

In [8]:
# Generate random test data
np.random.seed(0) # For reproducibility
X = np.random.rand(100, 3) # 100 samples, 3 features
Y = np.random.rand(100)
W = np.random.rand(3) # Initial guess for parameters
# Set hyperparameters
alpha = 0.01
iterations = 1000
# Test the gradient_descent function
final_params, cost_history = gradient_descent(X, Y, W, alpha, iterations)
# Print the final parameters and cost history
print("Final Parameters:", final_params)
print("Cost History:", cost_history)

Final Parameters: [-1.20863682 -0.90890243 -1.25008285]
Cost History: [0.10711197094660153, 0.10634244137675625, 0.10557938588628883, 0.1048228044751993, 0.10407269714348764, 0.10332906389115389, 0.10259190471819798, 0.10186121962461994, 0.10113700861041981, 0.10041927167559753, 0.09970800882015317, 0.09900322004408665, 0.09830490534739802, 0.09761306473008727, 0.0969276981921544, 0.09624880573359938, 0.0955763873544223, 0.09491044305462307, 0.0942509728342017, 0.0935979766931582, 0.09295145463149261, 0.09231140664920488, 0.09167783274629505, 0.0910507329227631, 0.09043010717860898, 0.08981595551383276, 0.08920827792843443, 0.08860707442241397, 0.08801234499577137, 0.08742408964850672, 0.0868423083806199, 0.08626700119211093, 0.08569816808297988, 0.08513580905322671, 0.08457992410285141, 0.08403051323185397, 0.08348757644023441, 0.08295111372799274, 0.08242112509512897, 0.08189761054164305, 0.08138057006753503, 0.08087000367280486, 0.08036591135745258, 0.07986829312147817, 0.0793771489

In [9]:
# Model Evaluation - RMSE
def rmse(Y, Y_pred):
  """
  This Function calculates the Root Mean Squres.
  Input Arguments:
  Y: Array of actual(Target) Dependent Varaibles.
  Y_pred: Array of predeicted Dependent Varaibles.
  Output Arguments:
  rmse: Root Mean Square.
  """
  rmse = np.sqrt(np.mean((Y-Y_pred)**2)) # Your Code Here
  return rmse

In [10]:
# Model Evaluation - R2
def r2(Y, Y_pred):
  """
  This Function calculates the R Squared Error.
  Input Arguments:
  Y: Array of actual(Target) Dependent Varaibles.
  Y_pred: Array of predeicted Dependent Varaibles.
  Output Arguments:
  rsquared: R Squared Error.
  """
  mean_y = np.mean(Y)
  ss_tot = np.sum((Y - mean_y) ** 2) # Your Code Here
  ss_res = np.sum((Y - Y_pred) ** 2)  # Your Code Here
  r2 = 1 - (ss_res / ss_tot) # Your Code Here
  return r2

In [11]:
# Main Function
def main():
  # Step 3: Split the data into training and test sets (80% train, 20% test)
  X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_seed=42)
  # Step 4: Initialize weights (W) to zeros, learning rate and number of iterations
  W = np.zeros(X_train.shape[1]) # Initialize weights
  alpha = 0.001 # Learning rate
  iterations = 1000 # Number of iterations for gradient descent
  # Step 5: Perform Gradient Descent
  W_optimal, cost_history = gradient_descent(X_train, Y_train, W, alpha, iterations)
  # Step 6: Make predictions on the test set
  Y_pred = np.dot(X_test, W_optimal)
  # Step 7: Evaluate the model using RMSE and R-Squared
  model_rmse = rmse(Y_test, Y_pred)
  model_r2 = r2(Y_test, Y_pred)
  # Step 8: Output the results
  print("Final Weights:", W_optimal)
  print("Cost History (First 10 iterations):", cost_history[:10])
  print("RMSE on Test Set:", model_rmse)
  print("R-Squared on Test Set:", model_r2)

# Execute the main function
if __name__ == "__main__":
  main()

Final Weights: [0.21678901 0.23524372 0.2324428 ]
Cost History (First 10 iterations): [0.13962580337017835, 0.13946963430304368, 0.13931359701664886, 0.139157691510994, 0.139001917786079, 0.13884627584190395, 0.1386907656784688, 0.13853538729577355, 0.13838014069381827, 0.13822502587260282]
RMSE on Test Set: 0.3858735538756433
R-Squared on Test Set: -0.8010975550728037
