## Linear Regression with one variable

In [None]:
__author__ = "Sai Teja Gudapati"
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from numpy.linalg import pinv
%matplotlib inline

In [None]:
# we are predicting profits for a foodchain based on a city's population
# Data is read from the file into a pandas DataFrame
data = pd.read_csv('./foodchain.txt')

#Read the feature matrix into X
X = data.population #or data.iloc[:,0]

#Read the output vector into the vector y
y = data.profit #or data.iloc[:,1]

#Read number of training examples into m
m = len(y)

In [None]:
# Visualization
# Since our problem has only one feature, we can visualize the data using a simple 2D scatter plot

fig, ax = plt.subplots()
ax.plot(X,y,ls='', marker = 'o', label='population vs profit')
ax.set(xlabel='population in 10000s', ylabel='profits in 10000$', title='population vs profit')

### Since we have only one feature population, we are required to fit two parameters theta0 and theta1
### the hypothesis function would be h(x) = theat0*x0 + theta1*x1

### theta is the vector that contains theta0 and theta1
### here x1 is the population feature. x0 feature for every training example contains 1 recall y=mx+c


In [None]:
theta = np.zeros((2,1)) #initialize theta0 and theta1 to zeros. We will initialize theta values randomly in practice
n = len(theta) # no of parameters

#Add the x0 feature column that contains ones
X = np.array(X).reshape(m,n-1) #convert X to np array
y = np.array(y).reshape(m,1)
X = np.hstack((np.ones((m,1)), X)) # add a column of ones before population column in X

iterations = 1500 #no of iterations for training
alpha = 0.01 #learning rate

In [None]:
# Calculate Cost Function
def computeCost(X, y, theta, m):
    sum=0
    for item in range(m):
        sum += (np.dot(X[item],theta) - y[item])**2
    return sum/(2*m)
#print(computeCost(X,y,theta)[0]) #This value should be approximately 32.07

In [None]:
# Function to perform Gradient Descent
def gradientDescent(X, y, theta, alpha, num_iters):
    cost_history = np.zeros((num_iters,1))
    X_trans = np.transpose(X)
    m = len(y)
    for item in range(num_iters):
        theta -= ((alpha/m) * np.dot(X_trans,(np.dot(X,theta) - y)))
        cost_history[item] = computeCost(X,y,theta,m)[0]
    return theta,cost_history
theta,cost_his = gradientDescent(X,y,theta,alpha,iterations) #theta will contain final values of parameters
print(theta)

In [None]:
# Plot the linear fit after obtaining final values of theta
fig, ax = plt.subplots()
ax.plot(data.population, data.profit,ls='', marker = 'o', label='population vs profit')
ax.plot(data.population, np.dot(X,theta))
ax.set(xlabel='population in 10000s', ylabel='profits in 10000$', title='population vs profit')

In [None]:
# Visualize how the cost function is varying
theta0_vals = np.arange(-10,10,0.2)
theta1_vals = np.arange(-1,4, 0.05)

cost_vals = np.zeros((len(theta0_vals), len(theta1_vals)))

fig = plt.figure()
ax = Axes3D(fig)

for i in range(len(theta0_vals)):
    for j in range(len(theta1_vals)):
        t = np.array([theta0_vals[i], theta1_vals[j]]).reshape(2,1)
        cost_vals[i][j] = computeCost(X,y,t,m)
theta0_vals,theta1_vals = np.meshgrid(theta0_vals, theta1_vals)

ax.plot_surface(X = theta0_vals,Y= theta1_vals, Z =cost_vals)
ax.set(xlabel='theta0', ylabel='theta1', zlabel='cost', title='cost vs theta')



In [None]:
# predict profits for given population using our model
def predict(population):
    pop = population/10000;
    feature_vector = np.array([1, pop]).reshape(1,2)
    profit = np.dot(feature_vector, theta)[0][0]
    return profit*10000
print(predict(35000)) #prints profit when population of a city is 35000

# Linear Regression with multiple variables (Multivariate Linear Regression)

In [None]:
# We are predicting prices of houses
# Load the data
data1 = pd.read_csv('./house_prices.txt')

#input matrix
X1 = np.array(data1.iloc[:,:2])

#no of training examples
m1 = np.shape(X1)[0]

#prices vector
y1 = np.array(data1.iloc[:,2]).reshape(m1,1)

#no of features + 1 i.e., theta0,......thetan
n1 = np.shape(X1)[1] + 1

#Theta vector
theta1 = np.zeros((n1,1))

#Display data
print(data1)

#### We notice 2 features from above data size and no of bedrooms. The features have a difference in scale by about 1000. To make Gradient Descent Converge faster, we need to scale the features so that their values are comparable. For this purpose, we perform feature scaling. Feature Scaling has been implemented in the next cell

In [None]:
# Feature Normalization
def featureNormalize(X):
    mu = np.mean(X, axis=0)
    sigma = np.std(X, axis=0)
    X_norm = (X - mu)/sigma
    return X_norm, mu, sigma

X1,mu,sigma = featureNormalize(X1)

In [None]:
# we have add a vector of ones to our input matrix to represent the feature x0
X1 = np.hstack((np.ones((m1,1)), X1))

#Choose learning rate
alpha1=0.01

# No of iterations before gradient descent stops
num_iters = 400

In [None]:
# Now we perform gradient descent
theta1,cost_his1 = gradientDescent(X1,y1,theta1,alpha1,num_iters)
print(theta1)

#### To ensure that gradient descent is converging, we plot the cost after each iteration vs number of iterations. We should see that the curve is decreasing. if you find that the curve is increasing, it means that gradient descent is not converging and that you have chosen a very high learning rate.

In [None]:
#plot cost vs number of iterations to know whether gradient descent worked correctly or not
fig,ax = plt.subplots()
ax.plot(list(range(1,401)), cost_his1)
ax.set(xlabel='no of iterations', ylabel = 'cost', title='cost vs iterations')

In [None]:
# Predict the prices of houses
def predict_price(size, bedrooms,theta1):
    size = (size - mu[0])/sigma[0] #because we have normalized the features,
    bedrooms = (bedrooms - mu[1])/sigma[1]#we have to narmalize size and bedrooms for prediction also
    feature_vector = np.array([1, size, bedrooms]).reshape(1,3)
    price = np.dot(feature_vector, theta1)[0][0]
    return price

#For a 1650 sqft 3-bedroom house, the price would be:
print(predict_price(1650,3,theta1))

In [None]:
# We have used gradient descent. Now we will use another method which basically solves for theta a system of linear equations
# The method is called the normal equation method. It is useful only when we have not more than 10 features or so.
def normal_equation(X,y):
    X_trans = np.transpose(X)
    theta1_normal = np.dot(pinv(np.dot(X_trans,X)), np.dot(X_trans,y))
    return theta1_normal
theta1_normal = normal_equation(X1,y1)

#prediction of price using normal equation method
print(predict_price(1650,3,theta1_normal))