# Superconductivity

In [68]:
import pandas as pd
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures

In [91]:
_df = pd.read_csv('https://raw.githubusercontent.com/PacktWorkshops/The-Data-Science-Workshop/master/Chapter07/Dataset/superconduct/train.csv')

In [4]:
_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21263 entries, 0 to 21262
Data columns (total 82 columns):
number_of_elements                 21263 non-null int64
mean_atomic_mass                   21263 non-null float64
wtd_mean_atomic_mass               21263 non-null float64
gmean_atomic_mass                  21263 non-null float64
wtd_gmean_atomic_mass              21263 non-null float64
entropy_atomic_mass                21263 non-null float64
wtd_entropy_atomic_mass            21263 non-null float64
range_atomic_mass                  21263 non-null float64
wtd_range_atomic_mass              21263 non-null float64
std_atomic_mass                    21263 non-null float64
wtd_std_atomic_mass                21263 non-null float64
mean_fie                           21263 non-null float64
wtd_mean_fie                       21263 non-null float64
gmean_fie                          21263 non-null float64
wtd_gmean_fie                      21263 non-null float64
entropy_fie            

In [8]:
X = _df.drop(['critical_temp'], axis=1).values

In [10]:
y = _df['critical_temp'].values

In [11]:
train_X, eval_X, train_y, eval_y = train_test_split(X, y, test_size=0.8, random_state=0)

In [12]:
model_1 = LinearRegression()

In [13]:
model_1.fit(train_X, train_y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [15]:
print('Model 1 R2 Score: {}'.format(model_1.score(eval_X, eval_y)))

Model 1 R2 Score: 0.7328447712730708


In [16]:
print(model_1.coef_)

[-4.94346491e+00  8.70902756e-01 -9.98866501e-01 -5.83760774e-01
  7.93953012e-01 -2.56095021e+01 -5.50632648e+00  1.27121023e-01
 -3.64947050e-02 -2.33513663e-01 -3.17000184e-02  3.10498264e-01
 -2.70341636e-01 -3.27264900e-01  3.01282843e-01 -8.31664513e+01
  4.76913648e+01  8.40711399e-02  2.07050106e-02 -2.97568138e-01
  3.97698378e-02 -1.06142981e+00  3.61994102e+00  6.70321650e-01
 -3.22006412e+00  4.02729358e+01  4.97586647e+01  1.91181858e-01
 -8.63617280e-02 -1.01111516e-01 -6.03489660e-01 -5.08255800e-03
  1.01228657e-03  2.34243690e-03  6.57781429e-04  1.24587526e+01
 -1.10269822e+01 -1.30154742e-03  5.73536446e-04  4.95127593e-03
 -9.30322207e-04  5.82173815e-02  4.77320536e-01  5.07267163e-02
 -5.53776412e-01  5.02250052e+00 -1.96605153e+01 -3.58831794e-01
 -1.02331087e-01  1.14617678e+00 -5.43201942e-01  1.66517118e+00
 -1.86511975e+00 -1.18487297e+00  1.27906539e+00 -2.20915415e+01
  2.34252719e+01 -4.11107209e-01  7.35352108e-01 -5.28505687e-01
  7.74281630e-01 -1.78504

In [18]:
preds_1 = model_1.predict(eval_X)

In [19]:
print('Model 1 MSE: {}'.format(mean_squared_error(eval_y, preds_1)))

Model 1 MSE: 314.1265890122019


In [55]:
steps = [
    ('scaler', MinMaxScaler()),
    ('poly', PolynomialFeatures(degree=3, interaction_only=True)),
    ('model', LinearRegression())
]

In [56]:
model_2 = Pipeline(steps)

In [57]:
model_2.fit(train_X, train_y)

Pipeline(memory=None,
         steps=[('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ('poly',
                 PolynomialFeatures(degree=3, include_bias=True,
                                    interaction_only=True, order='C')),
                ('model',
                 LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
                                  normalize=False))],
         verbose=False)

In [58]:
print('Model 2 R2 Score: {}'.format(model_2.score(eval_X, eval_y)))

Model 2 R2 Score: -3.412098915405949e+18


In [59]:
print('Number of coefficients: {}'.format(len(model_2[-1].coef_)))

Number of coefficients: 88642


In [64]:
steps = [
    ('scaler', MinMaxScaler()),
    ('poly', PolynomialFeatures(degree=3, interaction_only=True)),
    ('model', Lasso(alpha=0.001, max_iter=2000))
]

In [65]:
lasso_model = Pipeline(steps)

In [66]:
lasso_model.fit(train_X, train_y)

  positive)


Pipeline(memory=None,
         steps=[('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ('poly',
                 PolynomialFeatures(degree=3, include_bias=True,
                                    interaction_only=True, order='C')),
                ('model',
                 Lasso(alpha=0.001, copy_X=True, fit_intercept=True,
                       max_iter=2000, normalize=False, positive=False,
                       precompute=False, random_state=None, selection='cyclic',
                       tol=0.0001, warm_start=False))],
         verbose=False)

In [67]:
print('Lasso Model R2 Score: {}'.format(lasso_model.score(eval_X, eval_y)))

Lasso Model R2 Score: 0.8325230040978594


In [72]:
print(lasso_model[-1].coef_[:30])

[ 0.00000000e+00  8.74340500e-02 -7.95095837e+00 -1.30139088e-01
 -0.00000000e+00  0.00000000e+00  0.00000000e+00  3.38565726e+01
  0.00000000e+00 -0.00000000e+00 -4.13763260e+00 -2.65279487e-02
 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
 -0.00000000e+00  1.22329305e+01  0.00000000e+00 -0.00000000e+00
 -0.00000000e+00 -1.12633645e+01  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -0.00000000e+00  0.00000000e+00
  0.00000000e+00 -9.08364155e+00]


In [86]:
steps = [
    ('scaler', MinMaxScaler()),
    ('poly', PolynomialFeatures(degree=3, interaction_only=True)),
    ('model', Ridge(alpha=0.9))
]

In [87]:
ridge_model = Pipeline(steps)

In [88]:
ridge_model.fit(train_X, train_y)

Pipeline(memory=None,
         steps=[('scaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ('poly',
                 PolynomialFeatures(degree=3, include_bias=True,
                                    interaction_only=True, order='C')),
                ('model',
                 Ridge(alpha=0.9, copy_X=True, fit_intercept=True,
                       max_iter=None, normalize=False, random_state=None,
                       solver='auto', tol=0.001))],
         verbose=False)

In [89]:
print('Ridge Model R2 score: {}'.format(ridge_model.score(eval_X, eval_y)))

Ridge Model R2 score: 0.8322365420648976


In [90]:
print(ridge_model[-1].coef_[:30])

[ 0.         -0.42062418 -3.87358249 -4.24082447 -2.87059487 -1.42389767
 -1.74429581  4.28948862 -1.18107119 -1.92438245 -1.03653114 -0.28070952
  0.49678614 -2.44639462  1.5030846  -0.84415353 -0.68920212  4.01266927
  0.15348104 -1.39519727 -1.52030442 -4.20145985 -1.51392581  2.9521117
 -1.71399758  2.19445947 -1.57055309  2.43472526  0.55081481 -3.93851654]
