In [1]:
import pandas as pd
import numpy as np



In [2]:
df = pd.read_csv("/content/drive/MyDrive/concept and Technologies of AI/Houseprice (1).csv")
df.head()

Unnamed: 0,HouseAge,HouseFloor,HouseArea,HousePrice
0,52,2,112.945574,543917.179841
1,93,1,174.312126,817740.124828
2,15,4,125.219577,387992.503019
3,72,4,121.210124,240840.742388
4,61,4,59.221737,277273.386525


In [3]:
# Convert 'HouseAge' column to NumPy array
math = df['HouseAge'].to_numpy()

# Convert 'HouseFloor' column to NumPy array
read = df['HouseFloor'].to_numpy()

# Convert 'HouseArea' column to NumPy array
write = df['HouseArea'].to_numpy()

# Convert 'HousePrice' column to NumPy array, overwriting previous 'write'
write = df['HousePrice'].to_numpy()  # now 'write' stores house prices



In [4]:
# Creating a column of ones for the bias term
x0 = np.ones(len(math))

# Creating the feature matrix by combining bias, HouseAge, and HouseFloor
X2 = np.array([x0, math, read]).T

# Creating the weight vector and initializing it with zeros
W = np.array([0, 0, 0])

# Creating the target array from HousePrice
Y2 = np.array(write)


In [5]:
def cost_function(X, Y, W):
    """
    Creating a function to calculate Mean Squared Error (MSE)

    Parameters:
      X : Feature matrix
      Y : Target values
      W : Weight vector

    Returns:
      J : Mean squared error value
    """
    # Getting the number of examples
    m = len(Y)

    # Calculating the mean squared error
    J = np.sum((X.dot(W) - Y) ** 2) / (2 * m)

    # Returning the cost
    return J


In [6]:
# Creating a small test case to check if the cost function works
X_test = np.array([[1, 2], [3, 4], [5, 6]])  # Feature matrix
Y_test = np.array([3, 7, 11])               # Target values
W_test = np.array([1, 1])                   # Weight vector

# Calculating the cost for the test data
cost = cost_function(X_test, Y_test, W_test)

# Checking if the cost is zero (ideal weights)
if cost == 0:
    print("Proceed Further")
else:
    print("Something went wrong: Reimplement the cost function")
    print("Cost function output:", cost)


Proceed Further


In [7]:
# Calculating the initial cost using the feature matrix, target, and initial weights
inital_cost = cost_function(X2, Y2, W)

# Printing the initial cost value
print(inital_cost)


201528080199.132


In [8]:
def gradient_descent(X, Y, B, alpha, iterations):
    """
    Creating a function to perform Gradient Descent to minimize the cost function

    Parameters:
      X : Feature matrix
      Y : Target values
      B : Initial weights
      alpha : Learning rate
      iterations : Number of iterations

    Returns:
      W_update : Updated weights after gradient descent
      cost_history : List of cost values for each iteration
    """
    # Creating a list to store the cost at each iteration
    cost_history = [0] * iterations
    m = len(Y)  # Number of training examples

    # Looping through the specified number of iterations
    for iteration in range(iterations):

        # Calculating predictions with current weights
        Y_pred = X.dot(B)

        # Calculating the error (loss)
        loss = Y_pred - Y

        # Calculating gradient
        dw = (X.T.dot(loss)) / m

        # Updating weights
        W_update = B - alpha * dw

        # Calculating and storing the cost for this iteration
        cost = cost_function(X, Y, W_update)
        cost_history[iteration] = cost

    # Returning the final updated weights and cost history
    return W_update, cost_history


In [9]:

# Setting the learning rate
alpha = 0.0001

# Running gradient descent on the feature matrix, target, initial weights
# for 100,000 iterations to get optimized weights
new_weights, cost_history = gradient_descent(X2, Y2, W, alpha, 100000)

# Printing the final updated weights after gradient descent
print(new_weights)

# Printing the final cost after the last iteration to check if it decreased
print(cost_history[-1])


[  57.90964467 2882.43657469  163.66858437]
132815222383.60129


In [10]:
def rmse(Y, Y_pred):
    """
    Creating a function to calculate Root Mean Square Error (RMSE)

    Input Arguments:
      Y : Array of actual target values
      Y_pred : Array of predicted values

    Output:
      rmse : Root Mean Square Error
    """
    # Calculating the squared differences between actual and predicted values
    squared_diff = (Y - Y_pred) ** 2

    # Calculating mean of squared differences
    mean_squared_diff = sum(squared_diff) / len(Y)

    # Taking square root to get RMSE
    rmse = np.sqrt(mean_squared_diff)

    # Returning the RMSE value
    return rmse


In [11]:

def r2(Y, Y_pred):
    """
    Creating a function to calculate R Squared (Coefficient of Determination)

    Input Arguments:
      Y : Array of actual target values
      Y_pred : Array of predicted values

    Output:
      r2 : R Squared value
    """
    # Calculating mean of actual target values
    mean_y = np.mean(Y)

    # Total sum of squares (variance of actual values)
    ss_tot = sum((Y - mean_y) ** 2)

    # Residual sum of squares (variance not explained by model)
    ss_res = sum((Y - Y_pred) ** 2)

    # Calculating R squared
    r2 = 1 - (ss_res / ss_tot)

    # Returning R squared value
    return r2


In [12]:
# Making predictions using the optimized weights from gradient descent
Y_pred = X2.dot(new_weights)

# Calculating and printing the Root Mean Squared Error for predictions
print(rmse(Y2, Y_pred))

# Calculating and printing the R Squared value for predictions
print(r2(Y2, Y_pred))


515393.4853752059
-2.923439389740798
