In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

In [23]:
flange30 = pd.read_csv('data/flange_30.csv')
flange30

Unnamed: 0,Platform_Area,Topside_LWT,Lifetime,Shipbuilding_Time,Oil_Production,Block,Process,Flange_Total,Flange_usage
0,19308.2,7000,15,1.8,0.12,99,90,784,9
1,18037.6,20000,20,3.2,0.035,112,91,981,17
2,20021.6,6500,17,1.9,0.1,103,91,771,10
3,17852.8,14500,20,3.0,0.18,119,89,918,12
4,19825.0,32000,30,3.6,0.22,90,88,1076,14
5,18605.0,37478,20,3.7,0.22,120,88,1109,10
6,19842.99,14000,15,2.3,0.14,106,92,911,16
7,14927.0,16000,17,2.2,0.085,108,89,937,7
8,12498.2,12000,15,2.1,0.08,106,89,882,15
9,10176.6,4500,10,1.9,0.06,85,89,708,11


In [24]:
x_flange30 = flange30.drop(columns = ['Flange_usage'])
x_flange30

Unnamed: 0,Platform_Area,Topside_LWT,Lifetime,Shipbuilding_Time,Oil_Production,Block,Process,Flange_Total
0,19308.2,7000,15,1.8,0.12,99,90,784
1,18037.6,20000,20,3.2,0.035,112,91,981
2,20021.6,6500,17,1.9,0.1,103,91,771
3,17852.8,14500,20,3.0,0.18,119,89,918
4,19825.0,32000,30,3.6,0.22,90,88,1076
5,18605.0,37478,20,3.7,0.22,120,88,1109
6,19842.99,14000,15,2.3,0.14,106,92,911
7,14927.0,16000,17,2.2,0.085,108,89,937
8,12498.2,12000,15,2.1,0.08,106,89,882
9,10176.6,4500,10,1.9,0.06,85,89,708


In [25]:
y_flange30 = pd.DataFrame(flange30, columns=['Flange_usage'])
y_flange30

Unnamed: 0,Flange_usage
0,9
1,17
2,10
3,12
4,14
5,10
6,16
7,7
8,15
9,11


In [26]:
linear_regressor = LinearRegression()
linear_regressor.fit(x_flange30,y_flange30)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [27]:
print(linear_regressor.coef_)
print(linear_regressor.intercept_)

[[-1.18990657e-03 -1.72410476e-04  3.72186989e-01  3.59070083e+00
   2.70967521e+01 -5.18503722e-02  3.62219745e+00  1.12626990e-02]]
[-313.30251209]


In [60]:
polynomial_features = PolynomialFeatures(degree = 3, interaction_only = False, include_bias=False)
poly_regressor = LinearRegression()
pipeline = Pipeline([("polynomial_features",polynomial_features),("linear_regression",poly_regressor)])
pipeline.fit(x_flange30,y_flange30)

Pipeline(memory=None,
     steps=[('polynomial_features', PolynomialFeatures(degree=3, include_bias=False, interaction_only=False)), ('linear_regression', LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False))])

In [61]:
polynomial_features.get_feature_names()

['x0',
 'x1',
 'x2',
 'x3',
 'x4',
 'x5',
 'x6',
 'x7',
 'x0^2',
 'x0 x1',
 'x0 x2',
 'x0 x3',
 'x0 x4',
 'x0 x5',
 'x0 x6',
 'x0 x7',
 'x1^2',
 'x1 x2',
 'x1 x3',
 'x1 x4',
 'x1 x5',
 'x1 x6',
 'x1 x7',
 'x2^2',
 'x2 x3',
 'x2 x4',
 'x2 x5',
 'x2 x6',
 'x2 x7',
 'x3^2',
 'x3 x4',
 'x3 x5',
 'x3 x6',
 'x3 x7',
 'x4^2',
 'x4 x5',
 'x4 x6',
 'x4 x7',
 'x5^2',
 'x5 x6',
 'x5 x7',
 'x6^2',
 'x6 x7',
 'x7^2',
 'x0^3',
 'x0^2 x1',
 'x0^2 x2',
 'x0^2 x3',
 'x0^2 x4',
 'x0^2 x5',
 'x0^2 x6',
 'x0^2 x7',
 'x0 x1^2',
 'x0 x1 x2',
 'x0 x1 x3',
 'x0 x1 x4',
 'x0 x1 x5',
 'x0 x1 x6',
 'x0 x1 x7',
 'x0 x2^2',
 'x0 x2 x3',
 'x0 x2 x4',
 'x0 x2 x5',
 'x0 x2 x6',
 'x0 x2 x7',
 'x0 x3^2',
 'x0 x3 x4',
 'x0 x3 x5',
 'x0 x3 x6',
 'x0 x3 x7',
 'x0 x4^2',
 'x0 x4 x5',
 'x0 x4 x6',
 'x0 x4 x7',
 'x0 x5^2',
 'x0 x5 x6',
 'x0 x5 x7',
 'x0 x6^2',
 'x0 x6 x7',
 'x0 x7^2',
 'x1^3',
 'x1^2 x2',
 'x1^2 x3',
 'x1^2 x4',
 'x1^2 x5',
 'x1^2 x6',
 'x1^2 x7',
 'x1 x2^2',
 'x1 x2 x3',
 'x1 x2 x4',
 'x1 x2 x5',
 'x1 x2 x6

In [62]:
features = pd.DataFrame(polynomial_features.transform(x_flange30), columns=polynomial_features.get_feature_names(x_flange30.columns))
print(features)

   Platform_Area  Topside_LWT  Lifetime  Shipbuilding_Time  Oil_Production  \
0       19308.20       7000.0      15.0                1.8           0.120   
1       18037.60      20000.0      20.0                3.2           0.035   
2       20021.60       6500.0      17.0                1.9           0.100   
3       17852.80      14500.0      20.0                3.0           0.180   
4       19825.00      32000.0      30.0                3.6           0.220   
5       18605.00      37478.0      20.0                3.7           0.220   
6       19842.99      14000.0      15.0                2.3           0.140   
7       14927.00      16000.0      17.0                2.2           0.085   
8       12498.20      12000.0      15.0                2.1           0.080   
9       10176.60       4500.0      10.0                1.9           0.060   

   Block  Process  Flange_Total  Platform_Area^2  Platform_Area Topside_LWT  \
0   99.0     90.0         784.0     3.728066e+08              

In [63]:
print(polynomial_features.powers_)

[[1 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 ...
 [0 0 0 ... 0 2 1]
 [0 0 0 ... 0 1 2]
 [0 0 0 ... 0 0 3]]


In [64]:
print(poly_regressor.coef_)
print(poly_regressor.intercept_)

[[-3.94959634e-16  3.92275724e-15  9.78905292e-19  1.05394090e-18
  -1.78360173e-19  2.53522529e-17  3.91863803e-18  1.82247608e-16
  -9.48413175e-12  1.01325531e-11 -4.70473987e-14  1.62017110e-14
  -3.47790933e-15  5.64359006e-14  3.12365765e-14  8.73210849e-13
   3.76463832e-12  1.50876206e-14  3.52294216e-14 -4.05073833e-15
   4.63222954e-13  4.43963486e-13  3.00110293e-12 -4.48920035e-17
   2.22126838e-17 -4.31484226e-18  2.33320196e-16  1.66418284e-16
   2.02727860e-15  6.47163121e-18 -5.80338933e-19  1.62708619e-16
   1.07570267e-16  1.45653939e-15 -3.50301055e-20 -2.06891784e-17
  -1.58211224e-17 -1.77087307e-16  4.27347976e-15  2.69443948e-15
   3.46047013e-14  7.02175200e-16  2.03167622e-14  2.53946595e-13
   6.30338562e-11 -1.81840251e-10 -1.35485959e-09  2.74577047e-10
  -6.43519629e-11 -2.74469221e-09  2.65217098e-10 -1.26335854e-09
   1.10699169e-10 -4.66115429e-10  5.19397074e-10 -7.79146298e-11
   8.56173345e-10  2.58187062e-09  4.14238762e-09 -2.14336001e-12
   2.58060

In [32]:
x_flange_test = pd.read_csv('data/flange_test.csv')
x_flange_test

Unnamed: 0,Platform_Area,Topside_LWT,Lifetime,Shipbuilding_Time,Oil_Production,Block,Process,Flange_Total
0,15740.7,16234,19,2.8,0.13,94,94,899
1,15911.8,16398,17,2.8,0.12,113,95,934
2,16596.2,17710,19,2.4,0.14,108,93,943


In [36]:
y_flange_linear = linear_regressor.predict(x_flange_test)
y_flange_poly = pipeline.predict(x_flange_test)

In [34]:
y_flange_linear

array([[31.55449847],
       [33.33852348],
       [25.26419882]])

In [38]:
y_flange_poly

array([[40.29552416],
       [32.02667375],
       [27.88092604]])