In [None]:
import pandas as pd
import numpy as np
import sklearn.metrics as metrics
from sklearn.model_selection import RepeatedKFold
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV,train_test_split,KFold
from sklearn.model_selection import train_test_split,cross_val_score,cross_val_predict,RepeatedKFold,GridSearchCV
from sklearn.metrics import mean_absolute_error,r2_score
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression


In [None]:
def AARE(y_actual,y_pred):
    aare = np.mean(abs((y_pred - y_actual) / y_actual)) * 100
    return aare

In [None]:
data = pd.read_csv('total_holdup.csv')
data = data.dropna()
data.shape

In [None]:
data.head()

In [None]:
fig, axs = plt.subplots(2, 2, sharey=True, tight_layout=True,figsize=(10,10))
n_bins = 20

axs[0,0].hist(data['Experimental Liquid Holdup'], bins=n_bins)
axs[0,0].set_title('(a) Experimental Liquid Holdup')
axs[0,1].hist(data['Superficial Liquid  Velocity'], bins=n_bins)
axs[0,1].set_title('(b) Superficial Liquid Velocity ')
axs[1,0].hist(data['Superficial Gas Velocity'], bins=n_bins)
axs[1,0].set_title('(c)Superficial Gas Velocity  ')
axs[1,1].hist(data['Bed Porosity'], bins=n_bins)
axs[1,1].set_title('(d) Bed Porosity')
plt.show()

In [None]:
X,y = data.iloc[:,:-1],data.iloc[:,-1]

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X),columns=X.columns)


In [None]:
X_train, X_test,y_train, y_test = train_test_split(X_scaled,y ,test_size=0.2,random_state =42)

## Linear Regression

In [None]:
import timeit

start = timeit.default_timer()


reg = LinearRegression()
kfold_cv = KFold(n_splits=10,random_state=42,shuffle=True)
y_pred_cv = cross_val_predict(reg,X_train,y_train,cv=kfold_cv)

print("r-sqr = ",r2_score(y_train,y_pred_cv))
print("MAE = ",mean_absolute_error(y_train,y_pred_cv))
print("AARE = ",AARE(y_train,y_pred_cv))

stop = timeit.default_timer()

print('Time: ', stop - start)

In [None]:
import timeit

start = timeit.default_timer()

reg = LinearRegression()
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)

print("r-sqr = ",r2_score(y_test,y_pred))
print("MAE = ",mean_absolute_error(y_test,y_pred))
print("AARE = ",AARE(y_test,y_pred))

stop = timeit.default_timer()

print('Time: ', stop - start)

In [None]:
plt.scatter(y_test,y_pred,marker='o',facecolors='none', edgecolors='b',s=3)
plt.plot([np.min(y), np.max(y)], [np.min(y), np.max(y)],'k--', lw=1)
plt.xlabel('Exp')
plt.ylabel('Calc')
plt.text(0.25, .1,"MAE = {:.4f} \n R\N{SUPERSCRIPT TWO} = {:.2f}".format(mean_absolute_error(y_test,y_pred),
                                                                          r2_score(y_test,y_pred)))
plt.savefig('t1.jpg',format='jpg', dpi=300)
plt.show()

## GBR

In [None]:
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import matthews_corrcoef, confusion_matrix, accuracy_score,make_scorer
from sklearn.model_selection import GridSearchCV,cross_val_score,KFold,RepeatedKFold
from IPython.display import display, clear_output
from sklearn.preprocessing import scale,StandardScaler

kfold_cv = RepeatedKFold(n_splits=10,random_state=42,n_repeats=3)
estimator = GradientBoostingRegressor(learning_rate=0.08)

param_grid = {'loss':['squared_error','huber','absolute_error','quantile'],
          'n_estimators':[10,20,50,100,150],
          'criterion':['squared_error','friedman_mse'],
          'min_samples_split':np.arange(start=2,stop=8, step=2),
          'max_depth':np.arange(start=2,stop=8,step=2),
          'max_features':[1.0,0.8,0.5,0.3]}

scorer = make_scorer(mean_absolute_error, greater_is_better=False) # as smaller error is better
import warnings
warnings.filterwarnings('ignore', message='invalid value encountered in double_scalars')

grid = GridSearchCV(
    estimator=estimator,
    param_grid=param_grid,
    verbose=3,
    scoring=scorer,cv= kfold_cv,
    n_jobs=11)
    
grid.fit(X_train,y_train)
grid.best_estimator_

In [None]:
import timeit

start = timeit.default_timer()

reg = GradientBoostingRegressor(learning_rate=0.08, loss='huber', max_depth=6,
                          max_features=0.5, n_estimators=150,random_state=42)
kfold_cv = KFold(n_splits=10,random_state=42,shuffle=True)
y_pred_cv = cross_val_predict(reg,X_train,y_train,cv=kfold_cv)

print("r-sqr = ",r2_score(y_train,y_pred_cv))
print("MAE = ",mean_absolute_error(y_train,y_pred_cv))
print("AARE = ",AARE(y_train,y_pred_cv))

stop = timeit.default_timer()

print('Time: ', stop - start)

In [None]:
import timeit

start = timeit.default_timer()

reg = GradientBoostingRegressor(learning_rate=0.08, loss='huber', max_depth=6,
                          max_features=0.5, n_estimators=150,random_state = 42)
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)

print("r-sqr = ",r2_score(y_test,y_pred))
print("MAE = ",mean_absolute_error(y_test,y_pred))
print("AARE = ",AARE(y_test,y_pred))


stop = timeit.default_timer()

print('Time: ', stop - start)

In [None]:
plt.scatter(y_test,y_pred,marker='o',facecolors='none', edgecolors='b',s=3)
plt.plot([np.min(y), np.max(y)], [np.min(y), np.max(y)],'k--', lw=1)
plt.xlabel('Exp')
plt.ylabel('Calc')
plt.text(0.25, .1,"MAE = {:.4f} \n R\N{SUPERSCRIPT TWO} = {:.2f}".format(mean_absolute_error(y_test,y_pred),
                                                                          r2_score(y_test,y_pred)))
plt.savefig('t2.jpg',format='jpg', dpi=300)
plt.show()

# RF


In [None]:
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import matthews_corrcoef, confusion_matrix, accuracy_score,make_scorer
from sklearn.model_selection import GridSearchCV,cross_val_score,KFold
from IPython.display import display, clear_output
from sklearn.preprocessing import scale,StandardScaler

kfold_cv = RepeatedKFold(n_splits=10,random_state=42,n_repeats=3)
estimator = RandomForestRegressor()
param_grid = {
    'max_depth': [5,8,10],
    'max_features': [0.2,0.4,0.6,0.8],
    'n_estimators': [20,50,100,150]}

scorer = make_scorer(mean_absolute_error, greater_is_better=False) # as smaller error is better
import warnings
warnings.filterwarnings('ignore', message='invalid value encountered in double_scalars')

grid = GridSearchCV(
    estimator=estimator,
    param_grid=param_grid,
    verbose=3,
    scoring=scorer,cv= kfold_cv,
    n_jobs=11)
    
grid.fit(X_train,y_train)
grid.best_estimator_

In [None]:
import timeit

start = timeit.default_timer()

reg =RandomForestRegressor(max_depth=10, max_features=0.6, n_estimators=150,random_state=42)

kfold_cv = KFold(n_splits=10,random_state=42,shuffle=True)
y_pred_cv = cross_val_predict(reg,X_train,y_train,cv=kfold_cv)

print("r-sqr = ",r2_score(y_train,y_pred_cv))
print("MAE = ",mean_absolute_error(y_train,y_pred_cv))
print("AARE = ",AARE(y_train,y_pred_cv))

stop = timeit.default_timer()

print('Time: ', stop - start)

In [None]:
import timeit

start = timeit.default_timer()

reg = RandomForestRegressor(max_depth=10, max_features=0.6, n_estimators=150,random_state=42)
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)

print("r-sqr = ",r2_score(y_test,y_pred))
print("MAE = ",mean_absolute_error(y_test,y_pred))
print("AARE = ",AARE(y_test,y_pred))


stop = timeit.default_timer()

print('Time: ', stop - start)

In [None]:
plt.scatter(y_test,y_pred,marker='o',facecolors='none', edgecolors='b',s=3)
plt.plot([np.min(y), np.max(y)], [np.min(y), np.max(y)],'k--', lw=1)
plt.xlabel('Exp')
plt.ylabel('Calc')
plt.text(0.28, .06,"MAE = {:.4f} \n R\N{SUPERSCRIPT TWO} = {:.2f}".format(mean_absolute_error(y_test,y_pred),
                                                                          r2_score(y_test,y_pred)))
plt.savefig('t3.jpg',format='jpg', dpi=300)
plt.show()

# MLP


In [None]:
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import matthews_corrcoef, confusion_matrix, accuracy_score,make_scorer
from sklearn.model_selection import GridSearchCV,cross_val_score,KFold
from IPython.display import display, clear_output
from sklearn.preprocessing import scale,StandardScaler
from sklearn.neural_network import MLPRegressor

kfold_cv = RepeatedKFold(n_splits=10,random_state=42,n_repeats=3)
estimator = MLPRegressor()
param_grid = {"hidden_layer_sizes": [10,20,50], 
              "activation": ["identity", "logistic", "tanh", "relu"], 
              "solver": ["lbfgs", "sgd", "adam"], 
              "max_iter" : [50,80,100,150]}


scorer = make_scorer(mean_absolute_error, greater_is_better=False) # as smaller error is better
import warnings
warnings.filterwarnings('ignore', message='invalid value encountered in double_scalars')

grid = GridSearchCV(
    estimator=estimator,
    param_grid=param_grid,
    verbose=3,
    scoring=scorer,cv= kfold_cv,
    n_jobs=11)
    
grid.fit(X_train,y_train)
grid.best_estimator_

In [None]:
import timeit

start = timeit.default_timer()

reg =MLPRegressor(hidden_layer_sizes=50, max_iter=150, solver='lbfgs',random_state=42)

kfold_cv = KFold(n_splits=10,random_state=42,shuffle=True)
y_pred_cv = cross_val_predict(reg,X_train,y_train,cv=kfold_cv)

print("r-sqr = ",r2_score(y_train,y_pred_cv))
print("MAE = ",mean_absolute_error(y_train,y_pred_cv))
print("AARE = ",AARE(y_train,y_pred_cv))

stop = timeit.default_timer()

print('Time: ', stop - start)

In [None]:
import timeit

start = timeit.default_timer()

reg = MLPRegressor(hidden_layer_sizes=50, max_iter=150, solver='lbfgs',random_state=42)
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)

print("r-sqr = ",r2_score(y_test,y_pred))
print("MAE = ",mean_absolute_error(y_test,y_pred))
print("AARE = ",AARE(y_test,y_pred))


stop = timeit.default_timer()

print('Time: ', stop - start)

In [None]:
plt.scatter(y_test,y_pred,marker='o',facecolors='none', edgecolors='b',s=3)
plt.plot([np.min(y), np.max(y)], [np.min(y), np.max(y)],'k--', lw=1)
plt.xlabel('Exp')
plt.ylabel('Calc')
plt.text(0.28, .06,"MAE = {:.4f} \n R\N{SUPERSCRIPT TWO} = {:.2f}".format(mean_absolute_error(y_test,y_pred),
                                                                          r2_score(y_test,y_pred)))
plt.savefig('t4.jpg',format='jpg', dpi=300)
plt.show()