In [1]:
import pandas as pd
import numpy as np
from scipy.stats import uniform
from sklearn.datasets import load_iris
from sklearn.datasets import load_boston
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import SequenceSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics

In [2]:
iris = load_iris()
logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200, random_state=0)
distributions = dict(C=uniform(loc=0, scale=4), penalty=['l2', 'l1'])
clf = SequenceSearchCV(logistic, distributions, random_state=0)
X_train,X_test,y_train,y_test = train_test_split(iris.data, iris.target, test_size=0.4, random_state=0)
search = clf.fit(X_train, y_train)
print(search.best_params_)
print(search.score(X_test, y_test))

{'C': 2.0, 'penalty': 'l2'}
0.9666666666666667


In [3]:
boston = load_boston()
print(boston.keys())
data = pd.DataFrame(boston.data)
data.columns = boston.feature_names
data['PRICE'] = boston.target 
data.head()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename', 'data_module'])


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [6]:
X = data.drop(['PRICE'], axis = 1)
y = data['PRICE']

X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=0.5, random_state = 4)

# Create a Random Forest Regressor
rf_reg = RandomForestRegressor()
hps = dict(n_estimators=[50,100,150,200,250],min_samples_split=[2,3,4],min_samples_leaf=[1,2,3],max_features=["auto","sqrt","log2"])
reg = SequenceSearchCV(rf_reg, hps, random_state=0, n_iter=60)
# Train the model using the training sets 
reg.fit(X_train, y_train)
y_pred = reg.predict(X_train)

# Model Evaluation
print(reg.best_params_)
print('R^2:',metrics.r2_score(y_train, y_pred))
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_train, y_pred))*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train, y_pred)))

# Predicting Test data with the model
y_test_pred = reg.predict(X_test)
# Model Evaluation
acc_linreg = metrics.r2_score(y_test, y_test_pred)
print('R^2:', acc_linreg)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))

{'n_estimators': 250, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_features': 'sqrt'}
R^2: 0.9651819881919373
Adjusted R^2: 0.9632881214408712
MAE: 0.9827411644394267
MSE: 2.3569209120179595
RMSE: 1.535226664703932


In [7]:
# Predicting Test data with the model
y_pred1 = reg.predict(X)
# Model Evaluation
acc_linreg = metrics.r2_score(y, y_pred1)
print('R^2:', acc_linreg)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y, y_pred1))*(len(y)-1)/(len(y)-X.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y, y_pred1))
print('MSE:',metrics.mean_squared_error(y, y_pred1))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y, y_pred1)))

R^2: 0.8838074436728768
Adjusted R^2: 0.8807373151520381
MAE: 1.826805278562018
MSE: 9.80892403378601
RMSE: 3.1319201831761307


In [None]:
from sklearn.datasets import load_boston
boston = load_boston()
print(boston.keys())
import pandas as pd

data = pd.DataFrame(boston.data)
data.columns = boston.feature_names
data.head()

In [None]:
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import numpy as np

X = data.drop(['PRICE'], axis = 1)
y = data['PRICE']

X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=0.5, random_state = 4)

# Create a Random Forest Regressor
reg = RandomForestRegressor()

# Train the model using the training sets 
reg.fit(X_train, y_train)
y_pred = reg.predict(X_train)

# Model Evaluation
print('R^2:',metrics.r2_score(y_train, y_pred))
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_train, y_pred))*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train, y_pred)))

In [6]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

In [7]:
X, y = data.iloc[:,:-1],data.iloc[:,-1]
data_dmatrix = xgb.DMatrix(data=X,label=y)

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [9]:
# xg_reg = xgb.XGBRegressor(objective ='reg:linear', colsample_bytree = 0.3, learning_rate = 0.1,
#                 max_depth = 5, alpha = 10, n_estimators = 10)

In [10]:
from scipy.stats import uniform
from sklearn.model_selection import RandomizedSearchCV
import warnings

warnings.filterwarnings(action='ignore', category=UserWarning)
xg_reg = xgb.XGBRegressor(objective ='reg:squarederror')
hps = dict(learning_rate=uniform(loc=0.1,scale=0.5), colsample_bytree=uniform(loc=0.1,scale=0.5), 
           max_depth=[3,4,5,6,7], alpha=list(np.arange(0,11)), n_estimators=list(np.arange(5,11)))
xgb = SequenceSearchCV(xg_reg, hps, random_state=0, n_iter=50)
xgb.fit(X_train, y_train)
preds = xgb.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, preds))
print("RMSE: %f" % (rmse))

RMSE: 5.108768


In [11]:
xgb.best_params_

{'learning_rate': 0.566796875,
 'colsample_bytree': 0.570703125,
 'max_depth': 4,
 'alpha': 7,
 'n_estimators': 8}

In [12]:
xgb.score(X_train, y_train)

0.9481816156597155

In [None]:
xgb.score(X_test, y_test)