In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [15]:
def preprocess_data(X):
    # Perform any necessary preprocessing steps on the feature matrix X
    # For example, handle missing values, convert categorical variables, etc.
    # This function should return the preprocessed feature matrix
    # Example: Data normalization
    X_normalized = (X - X.min()) / (X.max() - X.min())
    return X_normalized

In [16]:
def add_bias(X):
    # Add a column of ones to the feature matrix X for the bias term
    return np.c_[np.ones(X.shape[0]), X]


In [17]:
def calculate_cost(X, y, weights):
    # Calculate the mean squared error cost function
    predictions = X.dot(weights)
    error = predictions - y
    cost = np.mean(error ** 2) / 2
    return cost

In [18]:
def gradient_descent(X, y, learning_rate, num_iterations):
    # Perform gradient descent to optimize the weights
    num_features = X.shape[1]
    weights = np.zeros(num_features)

    costs = []

    for _ in range(num_iterations):
        predictions = X.dot(weights)
        error = predictions - y
        gradient = X.T.dot(error) / len(X)
        weights -= learning_rate * gradient

        cost = calculate_cost(X, y, weights)
        costs.append(cost)

    return weights, costs

In [19]:
def plot_cost_vs_iterations(costs):
    # Plot the cost function value vs. number of iterations
    plt.plot(range(len(costs)), costs)
    plt.xlabel('Iterations')
    plt.ylabel('Cost')
    plt.title('Cost vs. Iterations')
    plt.show()

In [20]:
def plot_regression_line(X, y, weights):
    # Plot the regression line
    plt.scatter(X[:, 1], y, color='blue', label='Actual')
    plt.plot(X[:, 1], X.dot(weights), color='red', label='Predicted')
    plt.xlabel('Feature')
    plt.ylabel('Output')
    plt.title('Linear Regression')
    plt.legend()
    plt.show()

In [None]:
def linear_regression(x_train, y_train, learning_rate=0.01, num_iterations=1000):
    # Perform Linear Regression on the training data
    X = preprocess_data(x_train)
    X = add_bias(X)
    y = y_train

    weights, costs = gradient_descent(X, y, learning_rate, num_iterations)

    plot_cost_vs_iterations(costs)
    plot_regression_line(X, y, weights)

    return weights

# Load the dataset
dataset = "Medical Price Dataset.csv"
data = pd.read_csv(dataset)

# Print the column names
print(data.columns)

# Preprocess the data
x_train = data[['age', 'bmi', 'children']]
y_train = data['charges']

weights = linear_regression(x_train, y_train)

print("Weights:", weights)
