In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
import pymc3 as pm
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score

In [2]:
df = pd.read_csv('BTP_4.csv')
df.shape

(252, 7)

In [3]:
df.sample(5)

Unnamed: 0,Surface Tension,Contact Angle,viscosity,TA,AW,TL,FL
114,28.68,20.3,10.26,84.1,34.76,26.0,20.3
139,25.91,20.8,17.03,65.77,40.7,16.5,11.0
246,24.1,21.0,11.8,61.4,35.0,16.1,9.885
22,22.87,15.7,16.9,82.73,52.1,17.7,7.2
100,25.3,12.5,12.78,60.5,28.3,13.5,9.3


In [4]:
df.isnull().sum()

Surface Tension    0
Contact Angle      0
viscosity          0
TA                 0
AW                 0
TL                 0
FL                 0
dtype: int64

In [5]:
features_input = np.array(['Surface Tension','Contact Angle','viscosity'])
features_output = np.array(['TA','AW','TL','FL'])

### Linear Regression

In [6]:
validation = pd.DataFrame(columns=['Title','mse','mae','rmse','r2'])

for feature1 in features_input:
    for feature2 in features_output:
        x = feature1
        y = feature2
        X=df[x]
        Y=df[y]
        
        k=5
        kf = KFold(n_splits=k, shuffle=True, random_state=40)
        mse_scores = []
        mae_scores = []
        rmse_scores = []
        r2_scores = []
        model = LinearRegression()

        for train_index, test_index in kf.split(X):
            
            x_train, x_test = X[train_index], X[test_index]
            y_train, y_test = Y[train_index], Y[test_index]
            x_train = pd.DataFrame(x_train, columns=[x])
            x_test = pd.DataFrame(x_test, columns=[x])
     
            model.fit(x_train, y_train)
            y_pred = model.predict(x_test)
            
            mse_scores.append(mean_squared_error(y_test, y_pred))
            mae_scores.append(mean_absolute_error(y_test, y_pred))
            rmse_scores.append(np.sqrt(mean_squared_error(y_test, y_pred)))
            r2_scores.append(r2_score(y_test,y_pred))

        X = pd.DataFrame(X, columns=[x])
        model.fit(X,Y)
        y_pred_train = model.predict(x_train)
        
        # plt.scatter(X,Y,color='red')
        # X = x_train[x]
        # plt.plot(X,Y,color='blue',linewidth=0.4)
        # plt.title(y+' vs '+x)
        # plt.xlabel(feature1)
        # plt.ylabel(feature2)
        # plt.show()
        
        mse = np.mean(mse_scores)
        mae = np.mean(mae_scores)
        rmse = np.mean(rmse_scores)
        r2 = np.mean(r2_scores)
        
        val = {'Title':[feature1+' vs '+feature2],
                      'mse':[mse],
                      'mae':[mae],
                      'rmse':[rmse],
                      'r2':[r2]}
        val = pd.DataFrame(val)
        validation = pd.concat([validation, val], ignore_index=True)

In [7]:
validation

Unnamed: 0,Title,mse,mae,rmse,r2
0,Surface Tension vs TA,221.460084,11.666889,14.822318,0.289957
1,Surface Tension vs AW,36.62292,4.8577,6.023288,0.297989
2,Surface Tension vs TL,22.228741,3.616996,4.710317,0.166745
3,Surface Tension vs FL,19.116006,3.227546,4.365274,0.117363
4,Contact Angle vs TA,288.438738,13.656779,16.963579,0.072158
5,Contact Angle vs AW,48.651545,5.815346,6.963686,0.061706
6,Contact Angle vs TL,25.501055,3.779228,5.046189,0.041894
7,Contact Angle vs FL,21.3324,3.408172,4.613065,0.013594
8,viscosity vs TA,321.615067,14.103312,17.876579,-0.020711
9,viscosity vs AW,53.157873,5.888827,7.281881,-0.023242


### Polynomial regression

In [8]:
validation = pd.DataFrame(columns=['Title','mse','mae','rmse','r2'])
degree = 2

for feature1 in features_input:
    for feature2 in features_output:
        x = feature1
        y = feature2
        X=df[x]
        Y=df[y]
        
        k=5
        kf = KFold(n_splits=k, shuffle=True, random_state=40)
        mse_scores = []
        mae_scores = []
        rmse_scores = []
        r2_scores = []
        poly = PolynomialFeatures(degree=degree)

        for train_index, test_index in kf.split(X):
            
            x_train, x_test = X[train_index], X[test_index]
            y_train, y_test = Y[train_index], Y[test_index]
            x_train = pd.DataFrame(x_train, columns=[x])
            x_test = pd.DataFrame(x_test, columns=[x])

            X_train_poly = poly.fit_transform(x_train)
            X_test_poly = poly.transform(x_test)

            model = LinearRegression()
            model.fit(X_train_poly, y_train)

            y_pred = model.predict(X_test_poly)
            
            mse_scores.append(mean_squared_error(y_test, y_pred))
            mae_scores.append(mean_absolute_error(y_test, y_pred))
            rmse_scores.append(np.sqrt(mean_squared_error(y_test, y_pred)))
            r2_scores.append(r2_score(y_test,y_pred))

        
        mse = np.mean(mse_scores)
        mae = np.mean(mae_scores)
        rmse = np.mean(rmse_scores)
        r2 = np.mean(r2_scores)
        
        val = {'Title':[feature1+' vs '+feature2],
                      'mse':[mse],
                      'mae':[mae],
                      'rmse':[rmse],
                      'r2':[r2]}
        val = pd.DataFrame(val)
        validation = pd.concat([validation, val], ignore_index=True)

In [9]:
validation

Unnamed: 0,Title,mse,mae,rmse,r2
0,Surface Tension vs TA,234.308743,12.078167,15.278684,0.249772
1,Surface Tension vs AW,36.735139,4.876755,6.032235,0.295624
2,Surface Tension vs TL,24.219251,3.789381,4.907281,0.095543
3,Surface Tension vs FL,21.098,3.360956,4.573043,0.028702
4,Contact Angle vs TA,293.540318,13.810915,17.113069,0.054493
5,Contact Angle vs AW,49.340955,5.865263,7.013005,0.048539
6,Contact Angle vs TL,25.97045,3.810825,5.092306,0.024515
7,Contact Angle vs FL,21.742911,3.43461,4.657308,-0.004882
8,viscosity vs TA,315.924377,14.053871,17.717939,-0.004105
9,viscosity vs AW,52.997901,5.860111,7.27186,-0.020451


### Bayesian Regression

In [10]:
validation = pd.DataFrame(columns=['Title','mse','mae','rmse','r2'])

for feature1 in features_input:
    for feature2 in features_output:
        x = feature1
        y = feature2
        X=df[x]
        Y=df[y]
        
        k=5
        kf = KFold(n_splits=k, shuffle=True, random_state=40)
        mse_scores = []
        mae_scores = []
        rmse_scores = []
        r2_scores = []
        model = SVR(kernel='rbf', C=1.0, epsilon=0.2)
        

        for train_index, test_index in kf.split(X):
            
            x_train, x_test = X[train_index], X[test_index]
            y_train, y_test = Y[train_index], Y[test_index]
            x_train = pd.DataFrame(x_train, columns=[x])
            x_test = pd.DataFrame(x_test, columns=[x])
     
            model.fit(x_train, y_train)
            y_pred = model.predict(x_test)
            
            mse_scores.append(mean_squared_error(y_test, y_pred))
            mae_scores.append(mean_absolute_error(y_test, y_pred))
            rmse_scores.append(np.sqrt(mean_squared_error(y_test, y_pred)))
            r2_scores.append(r2_score(y_test,y_pred))

        
        mse = np.mean(mse_scores)
        mae = np.mean(mae_scores)
        rmse = np.mean(rmse_scores)
        r2 = np.mean(r2_scores)
        
        val = {'Title':[feature1+' vs '+feature2],
                      'mse':[mse],
                      'mae':[mae],
                      'rmse':[rmse],
                      'r2':[r2]}
        val = pd.DataFrame(val)
        validation = pd.concat([validation, val], ignore_index=True)

In [11]:
validation

Unnamed: 0,Title,mse,mae,rmse,r2
0,Surface Tension vs TA,250.319496,12.447015,15.743523,0.209196
1,Surface Tension vs AW,39.029,4.731917,6.224566,0.251337
2,Surface Tension vs TL,22.596981,3.577935,4.738901,0.162344
3,Surface Tension vs FL,18.638116,3.095984,4.302805,0.1451
4,Contact Angle vs TA,292.7143,13.148477,17.063431,0.070033
5,Contact Angle vs AW,47.761703,5.432032,6.900776,0.077776
6,Contact Angle vs TL,25.723954,3.724278,5.070032,0.035072
7,Contact Angle vs FL,21.122301,3.372032,4.592726,0.023351
8,viscosity vs TA,321.63713,13.822321,17.873012,-0.019955
9,viscosity vs AW,54.591443,5.819827,7.377321,-0.0505


### Random Forest Regression

In [12]:
validation = pd.DataFrame(columns=['Title','mse','mae','rmse','r2'])

for feature1 in features_input:
    for feature2 in features_output:
        x = feature1
        y = feature2
        X=df[x]
        Y=df[y]
        
        k=5
        kf = KFold(n_splits=k, shuffle=True, random_state=40)
        mse_scores = []
        mae_scores = []
        rmse_scores = []
        r2_scores = []
        model = RandomForestRegressor(n_estimators=100)
        

        for train_index, test_index in kf.split(X):
            
            x_train, x_test = X[train_index], X[test_index]
            y_train, y_test = Y[train_index], Y[test_index]
            x_train = pd.DataFrame(x_train, columns=[x])
            x_test = pd.DataFrame(x_test, columns=[x])
     
            model.fit(x_train, y_train)
            y_pred = model.predict(x_test)
            
            mse_scores.append(mean_squared_error(y_test, y_pred))
            mae_scores.append(mean_absolute_error(y_test, y_pred))
            rmse_scores.append(np.sqrt(mean_squared_error(y_test, y_pred)))
            r2_scores.append(r2_score(y_test,y_pred))

        
        mse = np.mean(mse_scores)
        mae = np.mean(mae_scores)
        rmse = np.mean(rmse_scores)
        r2 = np.mean(r2_scores)
        
        val = {'Title':[feature1+' vs '+feature2],
                      'mse':[mse],
                      'mae':[mae],
                      'rmse':[rmse],
                      'r2':[r2]}
        val = pd.DataFrame(val)
        validation = pd.concat([validation, val], ignore_index=True)

In [13]:
validation

Unnamed: 0,Title,mse,mae,rmse,r2
0,Surface Tension vs TA,111.213308,7.383281,10.408701,0.652665
1,Surface Tension vs AW,16.66453,3.129713,4.06779,0.675789
2,Surface Tension vs TL,13.660042,2.541367,3.65739,0.49833
3,Surface Tension vs FL,11.954651,2.370913,3.433864,0.448331
4,Contact Angle vs TA,89.196216,6.678838,9.426233,0.714284
5,Contact Angle vs AW,14.953663,2.937131,3.860188,0.711634
6,Contact Angle vs TL,8.030864,2.087197,2.822229,0.701511
7,Contact Angle vs FL,7.605466,1.970631,2.734377,0.652685
8,viscosity vs TA,115.987696,6.924469,10.37276,0.631607
9,viscosity vs AW,16.01838,2.922696,3.941687,0.694781


### Gaussian Process Regression

In [14]:
validation = pd.DataFrame(columns=['Title','mse','mae','rmse','r2'])

for feature1 in features_input:
    for feature2 in features_output:
        x = feature1
        y = feature2
        X=df[x]
        Y=df[y]
        
        k=5
        kf = KFold(n_splits=k, shuffle=True, random_state=40)
        mse_scores = []
        mae_scores = []
        rmse_scores = []
        r2_scores = []
        kernel = 1.0 * RBF(length_scale=1.0)
        model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)
        

        for train_index, test_index in kf.split(X):
            
            x_train, x_test = X[train_index], X[test_index]
            y_train, y_test = Y[train_index], Y[test_index]
            x_train = pd.DataFrame(x_train, columns=[x])
            x_test = pd.DataFrame(x_test, columns=[x])
     
            model.fit(x_train, y_train)
            y_pred = model.predict(x_test)
            
            mse_scores.append(mean_squared_error(y_test, y_pred))
            mae_scores.append(mean_absolute_error(y_test, y_pred))
            rmse_scores.append(np.sqrt(mean_squared_error(y_test, y_pred)))
            r2_scores.append(r2_score(y_test,y_pred))

        
        mse = np.mean(mse_scores)
        mae = np.mean(mae_scores)
        rmse = np.mean(rmse_scores)
        r2 = np.mean(r2_scores)
        
        val = {'Title':[feature1+' vs '+feature2],
                      'mse':[mse],
                      'mae':[mae],
                      'rmse':[rmse],
                      'r2':[r2]}
        val = pd.DataFrame(val)
        validation = pd.concat([validation, val], ignore_index=True)

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/st

In [15]:
validation

Unnamed: 0,Title,mse,mae,rmse,r2
0,Surface Tension vs TA,459.223952,10.406519,19.263036,-0.517074
1,Surface Tension vs AW,122.779367,5.566299,9.932768,-1.304269
2,Surface Tension vs TL,32.253575,3.2306,5.30577,-0.199345
3,Surface Tension vs FL,18.956628,2.654562,4.147326,0.110688
4,Contact Angle vs TA,449.978355,9.977355,18.650767,-0.498733
5,Contact Angle vs AW,119.895241,5.202406,9.650873,-1.247868
6,Contact Angle vs TL,28.207693,2.941272,4.81589,-0.053089
7,Contact Angle vs FL,15.350346,2.335836,3.676532,0.278673
8,viscosity vs TA,396.002091,8.410692,17.579635,-0.317926
9,viscosity vs AW,104.517293,4.601971,9.016166,-0.945022


### Bayesian Regression

In [None]:
validation = pd.DataFrame(columns=['Title','mse','mae','rmse','r2'])

for feature1 in features_input:
    for feature2 in features_output:
        x = feature1
        y = feature2
        X=df[x]
        Y=df[y]
        
        k=5
        kf = KFold(n_splits=k, shuffle=True, random_state=40)
        mse_scores = []
        mae_scores = []
        rmse_scores = []
        r2_scores = []
        
        models_and_traces = []

        for train_index, test_index in kf.split(X):
            
            x_train, x_test = X[train_index], X[test_index]
            y_train, y_test = Y[train_index], Y[test_index]
            x_train = pd.DataFrame(x_train, columns=[x])
            x_test = pd.DataFrame(x_test, columns=[x])
     
            with pm.Model() as model:
                slope = pm.Normal('slope', mu=0, sigma=10)
                intercept = pm.Normal('intercept', mu=0, sigma=10)
                sigma = pm.HalfNormal('sigma', sigma=1)

                mu = slope * x_train + intercept

                likelihood = pm.Normal('y', mu=mu, sigma=sigma, observed=y_train)

                trace = pm.sample(1000, tune=1000)
                y_pred_bayesian = trace['slope'] * x_test + trace['intercept']

            models_and_traces.append((model, trace))

            for i, (model, trace) in enumerate(models_and_traces):
                y_pred = trace['slope'].mean() * x_test + trace['intercept'].mean()
                mse_scores.append(mean_squared_error(y_test, y_pred))
                mae_scores.append(mean_absolute_error(y_test, y_pred))
                rmse_scores.append(np.sqrt(mean_squared_error(y_test, y_pred)))
                r2_scores.append(r2_score(y_test,y_pred))
            
            mse_scores.append(mean_squared_error(y_test, y_pred_bayesian))
            mae_scores.append(mean_absolute_error(y_test, y_pred_bayesian))
            rmse_scores.append(np.sqrt(mean_squared_error(y_test, y_pred_bayesian)))
            r2_scores.append(r2_score(y_test,y_pred_bayesian))

        
        mse = np.mean(mse_scores)
        mae = np.mean(mae_scores)
        rmse = np.mean(rmse_scores)
        r2 = np.mean(r2_scores)
        
        val = {'Title':[feature1+' vs '+feature2],
                      'mse':[mse],
                      'mae':[mae],
                      'rmse':[rmse],
                      'r2':[r2]}
        val = pd.DataFrame(val)
        validation = pd.concat([validation, val], ignore_index=True)

In [None]:
validation

Unnamed: 0,Title,mse,mae,rmse,r2
