In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from sklearn.metrics import root_mean_squared_error, r2_score

In [None]:
df = pd.read_csv(r'profits.csv', names=['population', 'profit'])
df.head()

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df['population'], df['profit'], c='blue', marker='o', label='Training Data')
plt.title('Profit vs. Population of City')
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
plt.legend()
plt.show()

In [None]:
X = df['population'].values.reshape(-1, 1)
y = df['profit'].values.reshape(-1, 1)
m = len(y)

X_b = np.c_[np.ones((m, 1)), X]

In [None]:
theta = np.zeros((2, 1))
learning_rate = 0.01
n_iterations = 1500
cost_history = []

for iteration in range(n_iterations):

    y_pred = X_b.dot(theta)

    error = y_pred - y

    gradients = (2 / m) * X_b.T.dot(error)

    theta = theta - learning_rate * gradients

    cost = (1 / m) * np.sum(np.square(error))
    cost_history.append(cost)

print("\nTheta found by Gradient Descent:")
print(theta)

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df['population'], df['profit'], c='blue', marker='o', label='Training Data')
plt.plot(X, X_b.dot(theta), c='red', label='Linear Regression')
plt.title('Linear Regression Fit')
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(range(n_iterations), cost_history, c='green')
plt.title('Cost Function over Iterations')
plt.xlabel('Number of Iterations')
plt.ylabel('Cost (J)')
plt.grid(True)
plt.show()

In [None]:
rmse = root_mean_squared_error(y,X_b.dot(theta))
r2 = r2_score(y,X_b.dot(theta))
print('Root mean squared error:', rmse)
print('R2 score:',r2)