# **Linear Regression**

In [1]:
# Importing libraries

import numpy as np

import pandas as pd

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

from sklearn import metrics

In [2]:
# MultipleLinearRegression

class MultipleLinearRegression() :
	
	def __init__( self, learning_rate, iterations ) :
		
		self.learning_rate = learning_rate
		
		self.iterations = iterations
		
	# Function for model training
			
	def fit( self, X, Y ) :
		
		# no_of_training_examples, no_of_features
		
		self.m, self.n = X.shape
		
		# weight initialization
		
		self.W = np.zeros( self.n )
		
		self.b = 0
		
		self.X = X
		
		self.Y = Y
		
		
		# gradient descent learning
				
		for i in range( self.iterations ) :
			
			self.update_weights()
			
		return self
	
	# Helper function to update weights in gradient descent
	
	def update_weights( self ) :
			
		Y_pred = self.predict( self.X )
		
		# calculate gradients
	
		dW = - ( 2 * ( self.X.T ).dot( self.Y - Y_pred ) ) / self.m
	
		db = - 2 * np.sum( self.Y - Y_pred ) / self.m
		
		# update weights
	
		self.W = self.W - self.learning_rate * dW
	
		self.b = self.b - self.learning_rate * db
		
		return self
	
	# Hypothetical function h( x )
	
	def predict( self, X ) :
	
		return X.dot( self.W ) + self.b


In [3]:
# Importing dataset
	
df = pd.read_csv( "Medical Price Dataset.csv" )

In [4]:
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


We need to encode sex , smoker and region before we can train the model

In [5]:
##Converting category labels into numerical using LabelEncoder
from sklearn.preprocessing import LabelEncoder
label = LabelEncoder()
label.fit(df.sex.drop_duplicates())
df.sex = label.transform(df.sex)
label.fit(df.smoker.drop_duplicates())
df.smoker = label.transform(df.smoker)
label.fit(df.region.drop_duplicates())
df.region = label.transform(df.region)
df.dtypes

age           int64
sex           int64
bmi         float64
children      int64
smoker        int64
region        int64
charges     float64
dtype: object

In [9]:
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,3,16884.924
1,18,1,33.77,1,0,2,1725.5523
2,28,1,33.0,3,0,2,4449.462
3,33,1,22.705,0,0,1,21984.47061
4,32,1,28.88,0,0,1,3866.8552


In [6]:
X = df.iloc[:,:-1].values

Y = df.iloc[:,1].values

# Splitting dataset into train and test set

X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size = 1/3, random_state = 0 )

In [8]:
# Model training
	
model = MultipleLinearRegression( iterations = 1000, learning_rate = 0.01)

model.fit( X_train, Y_train )

  dW = - ( 2 * ( self.X.T ).dot( self.Y - Y_pred ) ) / self.m
  self.W = self.W - self.learning_rate * dW


<__main__.MultipleLinearRegression at 0x7fa6aaf51a80>

In [None]:
# Prediction on test set

Y_pred = model.predict( X_test )

print( "Predicted values ", np.round( Y_pred[:3], 2 ) )

print( "Real values	 ", Y_test[:3] )

print( "Trained W	 ", round( model.W[0], 2 ) )

print( "Trained b	 ", round( model.b, 2 ) )