<a href="https://colab.research.google.com/github/amirrhseiin/Germany-Rental-Offers/blob/main/Germany_Rental_offers_Regressions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

In [None]:
data = pd.read_csv("/content/drive/MyDrive/ML/EX1/part2/archive.zip")

In [None]:
num_values = {"serviceCharge": data.serviceCharge.mean(), "telekomUploadSpeed": 0, "heatingType": data.heatingType.mode()[0]}
data.fillna(value=num_values, inplace=True)

data = data[['serviceCharge', 'telekomUploadSpeed', 'heatingType', 'livingSpace']]

data.isna().sum()

serviceCharge         0
telekomUploadSpeed    0
heatingType           0
livingSpace           0
dtype: int64

In [None]:
data.sample(3)

Unnamed: 0,serviceCharge,telekomUploadSpeed,heatingType,livingSpace
212049,180.0,40.0,district_heating,60.0
118028,64.0,40.0,district_heating,49.0
68542,180.0,40.0,floor_heating,120.01


In [None]:
d = pd.get_dummies(data.heatingType, prefix='heatingType')
d['telekomUploadSpeed'] = data['telekomUploadSpeed']
d['livingSpace'] = data['livingSpace']
d['serviceCharge'] = data['serviceCharge']
d.sample(3)

Unnamed: 0,heatingType_central_heating,heatingType_combined_heat_and_power_plant,heatingType_district_heating,heatingType_electric_heating,heatingType_floor_heating,heatingType_gas_heating,heatingType_heat_pump,heatingType_night_storage_heater,heatingType_oil_heating,heatingType_self_contained_central_heating,heatingType_solar_heating,heatingType_stove_heating,heatingType_wood_pellet_heating,telekomUploadSpeed,livingSpace,serviceCharge
161181,1,0,0,0,0,0,0,0,0,0,0,0,0,40.0,58.24,120.0
189713,1,0,0,0,0,0,0,0,0,0,0,0,0,10.0,90.0,250.0
101048,1,0,0,0,0,0,0,0,0,0,0,0,0,40.0,68.0,90.0


In [None]:
data_labels = d['livingSpace'].copy()
data_features = d.drop(columns='livingSpace')

In [None]:
x_train, x_test, y_train, y_test = train_test_split(data_features, data_labels, test_size=0.2)

In [None]:
sc = StandardScaler()
 
standard_X_train = sc.fit_transform(x_train)
standard_X_test = sc.transform(x_test)

In [None]:
class SimpleLinearRegression() : 
      
    def __init__( self, learning_rate, iterations ) : 
          
        self.learning_rate = learning_rate 
          
        self.iterations = iterations 
    def fit(self, X, Y):
      X_train = np.c_[np.ones(X.shape[0]), X]
      self.W = np.random.rand((X_train.shape[1]))
      for i in range(self.iterations):
        self.W = self.W  - self.learning_rate * self.calcGradient(X_train, Y)

    def calcGradient(self, X, Y):
      return 2/X.shape[0] * np.dot(X.T, (np.dot(X, self.W) - Y))

    def predict(self, X):
      Xpred = np.c_[np.ones(X.shape[0]), X]
      return np.dot(Xpred, self.W)

In [None]:
model = SimpleLinearRegression(iterations = 1000, learning_rate = 0.01)

In [None]:
model.fit(standard_X_train, y_train)

In [None]:
y_pred = model.predict(standard_X_test)

In [None]:
print(f'MSE on the test set: {mean_squared_error(y_test, y_pred)}')

MSE on the test set: 1125.6715078175303


In [None]:
model_1 = LinearRegression()
model_1.fit(standard_X_train, y_train)
y_pred_1 = model_1.predict(standard_X_test)

In [None]:
print(f'MSE on the test set: {mean_squared_error(y_test, y_pred_1)}')

MSE on the test set: 1125.7051500327655


In [None]:
from sklearn.model_selection import KFold

In [None]:
ridge_regression_model = Ridge(alpha=1.0)

cv = KFold(n_splits=10, random_state=1, shuffle=True)
scores = cross_val_score(ridge_regression_model, standard_X_train, y_train, scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)
scores = np.absolute(scores)
print('10 Fold:Mean MSE: %.3f (%.3f)' % (np.mean(scores), np.std(scores)))

10 Fold:Mean MSE: 87382.327 (173090.936)


In [None]:
lasso_regression_model = Lasso(alpha=1.0, fit_intercept=True, random_state=77)
    
cv = KFold(n_splits=10, random_state=1, shuffle=True)
scores = cross_val_score(lasso_regression_model, standard_X_train, y_train, scoring='neg_mean_squared_error', cv=cv, n_jobs=-1)
scores = np.absolute(scores)
print('10 Fold:Mean MSE: %.3f (%.3f)' % (np.mean(scores), np.std(scores)))

10 Fold:Mean MSE: 86213.890 (173254.721)
