# multiple linear regression 


### importing libraries

In [1]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
import numpy as np

### extracting dataframe

In [2]:
data = fetch_california_housing(as_frame =True)
df = pd.DataFrame(data.frame)
features=df.columns.tolist()
price = data.target
size = 20640
train_size = (int)(0.8*size)
test_size = size - train_size

# scaling data


In [3]:
scaled_data = df.copy()

for x in features:
    m = df[x].mean()
    sd = df[x].std()
    scaled_data[x] = (df[x] - m) / sd
scaled_data
scaled_price = (price-np.mean(price))/np.std(price)

# extract all features

In [4]:
medinc=np.array(scaled_data['MedInc'])
HouseAge=np.array(scaled_data['HouseAge'])
AveRooms=np.array(scaled_data['AveRooms'])
AveBedrms = np.array(scaled_data['AveBedrms'])
Population=np.array(scaled_data['Population'])
AveOccup = np.array(scaled_data['AveOccup'])
Latitude=np.array(scaled_data['Latitude'])
Longitude = np.array(scaled_data['Longitude'])
MedHouseVal = np.array(scaled_data['MedHouseVal'])
no_features=10

# split train and test data

In [5]:
# training data
train_price =np.array( scaled_price[:train_size])
train_medinc=np.array(medinc[:train_size])
train_HouseAge=np.array(HouseAge[:train_size])
train_AveRooms=np.array(AveRooms[:train_size])
train_AveBedrms = np.array(AveBedrms[:train_size])
train_Population=np.array(Population[:train_size])
train_AveOccup = np.array(AveOccup[:train_size])
train_Latitude = np.array(Latitude[:train_size])
train_Longitude = np.array(Longitude[:train_size])
train_MedHouseVal = np.array(MedHouseVal[:train_size])

train_data1 = np.vstack((train_medinc,train_HouseAge,train_AveRooms,train_AveBedrms,train_Population,train_AveOccup,train_Latitude,train_Longitude,train_MedHouseVal,np.ones(train_size)))

test_price =np.array( scaled_price[train_size:])
test_medinc=np.array(medinc[train_size:])
test_HouseAge=np.array(HouseAge[train_size:])
test_AveRooms=np.array(AveRooms[train_size:])
test_AveBedrms = np.array(AveBedrms[train_size:])
test_Population=np.array(Population[train_size:])
test_AveOccup = np.array(AveOccup[train_size:])
test_Latitude = np.array(Latitude[train_size:])
test_Longitude = np.array(Longitude[train_size:])
test_MedHouseVal = np.array(MedHouseVal[train_size:])
test_data = np.vstack((test_medinc,test_HouseAge,test_AveRooms,test_AveBedrms,test_Population,test_AveOccup,test_Latitude,test_Longitude,test_MedHouseVal))



### prediction straight line equation
**price = a\*a_ + b\*b_ + c\*c_ + d\*d_ + e\*e_ + f\*f_ + g\*g_ h\*h_ + const**   
*x => coefficient of variable x*   
*x_ => variable x*   
- a -> 'MedInc'  
- b -> 'HouseAge'   
- c -> 'AveRooms'   
- d -> 'AveBedrms'    
- e -> 'Population'    
- f -> 'AveOccup'
- g -> 'Latitude'    
- h -> 'Longitude'   
- i -> 'MedHouseVal

In [6]:
def der_loss(price ,cf_features,train_size,k,train_data):
    diff=0
    error = np.zeros(train_size)
    for i in range (0,train_size):
        predicted_price= (cf_features[0]*train_data[0][i]) + (cf_features[1]*train_data[1][i]) + (cf_features[2]*train_data[2][i]) + (cf_features[3]*train_data[3][i]) + (cf_features[4]*train_data[4][i]) + (cf_features[5]*train_data[5][i]) +( cf_features[6]*train_data[6][i] )+ (cf_features[7]*train_data[7][i]) + (cf_features[8]*train_data[8][i]) + cf_features[9]
        error[i] = (-price[i]+predicted_price)*train_data[k][i]
    return np.average(error)


In [7]:
learn_rate = 0.01
cf_features = np.ones(no_features)
for i in range (0,1000):
    #loss=der_loss(train_medinc,train_AveBedrms,train_AveOccup,train_AveRooms,train_HouseAge,train_Latitude,train_Longitude,train_MedHouseVal,train_Population,train_price ,cf_features,train_size)
    cf_features[0]-= (learn_rate*2*der_loss(train_price,cf_features,train_size,0,train_data1,))
    cf_features[1]-= (learn_rate*2*der_loss(train_price,cf_features,train_size,1,train_data1,))
    cf_features[2]-= (learn_rate*2*der_loss(train_price,cf_features,train_size,2,train_data1,))
    cf_features[3]-= (learn_rate*2*der_loss(train_price,cf_features,train_size,3,train_data1,))
    cf_features[4]-= (learn_rate*2*der_loss(train_price,cf_features,train_size,4,train_data1,))
    cf_features[5]-= (learn_rate*2*der_loss(train_price,cf_features,train_size,5,train_data1,))
    cf_features[6]-= (learn_rate*2*der_loss(train_price,cf_features,train_size,6,train_data1,))
    cf_features[7]-= (learn_rate*2*der_loss(train_price,cf_features,train_size,7,train_data1,))
    cf_features[8]-= (learn_rate*2*der_loss(train_price,cf_features,train_size,8,train_data1,))
    cf_features[9]-= (learn_rate*2*der_loss(train_price,cf_features,train_size,9,train_data1,))

KeyboardInterrupt: 

# testing model