In [91]:
import pandas as pd
import numpy as np
import sklearn
from sklearn import preprocessing, model_selection
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_absolute_percentage_error, mean_squared_error

In [566]:
#reading the dataset and creating dataframe
df = pd.read_csv('datasets/CNN_parameter_dataset.csv')
int_flow = df.copy()
int_flow.head()

Unnamed: 0,Sample,Cavity,Voltage,Flow rate,Duty cycle,Pulse width,Pulse period,Pulse pkpk,Pulse rms,Laser energy,cavity width(mm),cavity depth(µm)
0,5,6,25,0.47,20,4,20,3.132333,1.318667,0,2.285,72.1
1,5,7,25,0.47,20,4,20,3.010667,1.273267,0,2.199,68.2
2,5,8,25,0.47,20,4,20,3.329467,1.397467,0,2.108,67.2
3,5,9,25,0.47,20,4,20,3.260867,1.379214,0,2.233,61.2
4,5,10,25,0.47,20,4,20,3.198667,1.3488,0,2.046,52.2


In [567]:
#selecting features, formatting and shuffling data
int_flow.columns = [col.strip() for col in df.columns]
int_flow.drop(['Sample','Cavity','Pulse width'], axis=1, inplace=True)
int_flow['cavity width(mm)'] = int_flow['cavity width(mm)']*1000
#int_flow = sklearn.utils.shuffle(int_flow)
int_flow.head()

Unnamed: 0,Voltage,Flow rate,Duty cycle,Pulse period,Pulse pkpk,Pulse rms,Laser energy,cavity width(mm),cavity depth(µm)
0,25,0.47,20,20,3.132333,1.318667,0,2285.0,72.1
1,25,0.47,20,20,3.010667,1.273267,0,2199.0,68.2
2,25,0.47,20,20,3.329467,1.397467,0,2108.0,67.2
3,25,0.47,20,20,3.260867,1.379214,0,2233.0,61.2
4,25,0.47,20,20,3.198667,1.3488,0,2046.0,52.2


In [568]:
#initializing X(features) and y(labels)
X = int_flow.drop(['cavity width(mm)', 'cavity depth(µm)'], axis=1).values
y = int_flow[['cavity width(mm)','cavity depth(µm)']].values

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, random_state=5)

In [569]:
#training the linear regression model

#parameters = [{ 'fit_intercept': [True,False],
#              'normalize': [True,False] }]

#regression = GridSearchCV(LinearRegression(), parameters, cv=5, scoring='neg_mean_absolute_percentage_error')
#reg = regression.fit(X_train,y_train)

#print("Best parameters set found on development set:")
#print(regression.best_params_)

reg = LinearRegression().fit(X_train, y_train)

In [570]:
#performance measure
predictions = reg.predict(X_test)
for a in (list(zip(y_test,predictions))):
    print(a)

(array([2570. ,   97.6]), array([2457.41470185,   94.09425486]))
(array([2449.,  108.]), array([2306.05173542,   91.67272413]))
(array([1895. ,   79.7]), array([2743.9724611 ,   94.56261673]))
(array([2411. ,   66.2]), array([2378.33342739,   82.6812892 ]))
(array([3055. ,   78.4]), array([2519.14185528,   98.94277011]))
(array([2078. ,  107.5]), array([2431.63202185,   91.43400854]))
(array([831.  ,  39.72]), array([949.70285164,  41.78964803]))
(array([1494. ,   80.3]), array([1551.06506486,   91.70804361]))
(array([2538.,  138.]), array([2702.89848587,  126.70060599]))
(array([2443. ,  112.1]), array([2504.89686495,   91.79033903]))
(array([2380. ,  103.3]), array([2320.41904849,   91.98155179]))
(array([2430. ,   96.2]), array([2275.65721936,   93.8664742 ]))
(array([2278. ,   82.9]), array([1879.434996  ,   92.05167122]))
(array([1852. ,   89.1]), array([1835.73037523,   83.20619631]))
(array([2979.,   82.]), array([2424.27946948,   90.84384862]))
(array([2576. ,  110.8]), array([

In [571]:
print(reg.coef_)

[[-9.71552388e+00 -6.89828845e+02 -3.97787764e+01 -1.54142378e-01
  -1.19246356e+03  2.65227051e+03  3.79873390e+00]
 [ 8.63726934e-01 -1.02104555e+02  2.26451907e+00  5.74382676e-02
   3.29134866e+01 -4.57947095e+01 -2.08828714e-02]]


In [572]:
print('r2_score for width: ',r2_score(y_test[:,0], predictions[:,0]))
print('r2_score for depth: ',r2_score(y_test[:,1], predictions[:,1]))

print('MAPE for width: ', mean_absolute_percentage_error(y_test[:,0], predictions[:,0]))
print('MAPE for depth: ',mean_absolute_percentage_error(y_test[:,1], predictions[:,1]))

print('MSE for width: ',mean_squared_error(y_test[:,0], predictions[:,0]))
print('MSE for depth: ',mean_squared_error(y_test[:,1], predictions[:,1]))

r2_score for width:  0.738044515822956
r2_score for depth:  0.6196567673332387
MAPE for width:  0.11011729860133176
MAPE for depth:  0.1290386850264645
MSE for width:  92907.40775649922
MSE for depth:  169.8441947037563
