In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.preprocessing import OrdinalEncoder,StandardScaler
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
PATH = '/Users/microwave/AIO_2024/Module_3/Week4/Housing.csv'
df = pd.read_csv(PATH)

In [3]:
categorical_cols = df.select_dtypes(include=['object']).columns.to_list()
print(categorical_cols)

['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']


In [4]:
ordinal_encoder = OrdinalEncoder()
encoded_categorical_cols = ordinal_encoder.fit_transform(
    df[categorical_cols]
)

encoded_categorical_df = pd.DataFrame(
    encoded_categorical_cols,
    columns=categorical_cols
)

numerical_df = df.drop(categorical_cols, axis = 1)
encoded_df = pd.concat(
    [numerical_df,encoded_categorical_df],axis=1
)

In [5]:
normalizer = StandardScaler()
dataset_arr = normalizer.fit_transform(encoded_df)

In [6]:
X,y = dataset_arr[:,1:], dataset_arr[:,0]

In [7]:
test_size = 0.3
seed = 1
X_train, X_val, y_train, y_val = train_test_split(X,y,test_size=test_size,shuffle=True)

In [8]:
param_grid = {
    'n_estimators': range(40,100),
    'criterion':['squared_error','absolute_error'],
    'max_features' : ['sqrt','log2'],
    'oob_score': [True,False],
    'bootstrap': [True]
}
grid_search_rf = GridSearchCV(estimator = RandomForestRegressor(),param_grid=param_grid,error_score='raise')

grid_search_rf.fit(X_train,y_train)

best_params = grid_search_rf.best_params_
print("Best parameters:", best_params)
y_pred_rf = grid_search_rf.predict(X_val)

Best parameters: {'bootstrap': True, 'criterion': 'absolute_error', 'max_features': 'log2', 'n_estimators': 63, 'oob_score': True}


In [9]:
param_grid = {
    'n_estimators': range(5, 30),
    'learning_rate': np.arange(0.1, 2, 0.1),
    'loss': ['linear', 'square']
}

grid_search_ada = GridSearchCV(estimator=AdaBoostRegressor(), param_grid=param_grid,error_score='raise')
grid_search_ada.fit(X_train, y_train)

best_params = grid_search_ada.best_params_
print("Best parameters:", best_params)
y_pred_ada = grid_search_ada.predict(X_val)

Best parameters: {'learning_rate': 0.8, 'loss': 'linear', 'n_estimators': 27}


  _data = np.array(data, dtype=dtype, copy=copy,


In [10]:
mae_rf = mean_absolute_error(y_val , y_pred_rf)
mse_rf = mean_squared_error(y_val , y_pred_rf )

mae_ada = mean_absolute_error(y_val , y_pred_rf)
mse_ada = mean_squared_error(y_val , y_pred_rf )

In [11]:
print('Evaluation results on validataion set for Random forest regressor with GridsearchCV: ')
print(f'Mean absolute error {mae_rf}')
print(f'Mean squared error {mse_rf}')

Evaluation results on validataion set for Random forest regressor with GridsearchCV: 
Mean absolute error 0.4620087152923201
Mean squared error 0.5006796000269119


In [12]:
print('Evaluation results on validataion set for Adaboost with GridsearchCV: ')
print(f'Mean absolute error {mae_ada}')
print(f'Mean squared error {mse_ada}')

Evaluation results on validataion set for Adaboost with GridsearchCV: 
Mean absolute error 0.4620087152923201
Mean squared error 0.5006796000269119


In [29]:
df = {'X':[3,5,8,10,12],'y':[12,20,28,32,36]}
df = pd.DataFrame(df)

tree_test = DecisionTreeRegressor(criterion='squared_error', random_state=42)
tree_test.fit(df[['X']],df[['y']])

array([12.])

In [30]:
tree_test.predict([[2]]), tree_test.predict([[15]])



(array([12.]), array([36.]))

In [32]:
data = {'X': [2, 1, 3, 5], 'Y': [4, 3, 5, 6]}
df = pd.DataFrame(data)


X = df[['X']]  
y = df['Y']    

model = RandomForestRegressor(n_estimators=2, max_depth=1, random_state=42, bootstrap=True)
model.fit(X, y)


X_test = np.array([[2]]) 
y_pred = model.predict(X_test)
y_pred, 



array([4.])