In [113]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import scale
from sklearn.decomposition import PCA
import scipy

from sklearn.model_selection import cross_val_score, KFold
from sklearn.linear_model import LinearRegression
from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import mean_squared_error

## Neocis data##

In [114]:
df = pd.read_csv('https://raw.githubusercontent.com/bevi-rosso/Neocis/master/Neocis.csv', sep=';')
X = np.array(df.index)
x = df.drop(['Mooney','VEI'],axis=1)
y = df.Mooney

In [115]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=X, y=df.Tingresso, mode='lines', name='T ingresso'))
fig.add_trace(go.Scatter(x=X, y=df.Solvente, mode='lines', name='Solvente'))
fig.add_trace(go.Scatter(x=X, y=df.Nd_on_PBu, mode='lines', name='Nd/PBu'))
fig.add_trace(go.Scatter(x=X, y=df.Al_on_Nd, mode='lines', name='Al/Nd'))
fig.add_trace(go.Scatter(x=X, y=df.Sty_on_Bde, mode='lines', name='Sty/Bde'))
fig.add_trace(go.Scatter(x=X, y=df.Mooney, mode='lines', name='Mooney'))
fig.add_trace(go.Scatter(x=X, y=df.VEI, mode='lines', name='VEI'))
fig.update_layout(title='Dati input', xaxis_title='n° samples')

## PCA ##

In [116]:
pca = PCA()
xs = scale(df)
x_red = pca.fit_transform(xs)
V = pca.components_
nPC = np.arange(len(V)) + 1
dfV = pd.DataFrame(V, columns=[df.columns], index=[nPC])
EV = pd.DataFrame(np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100),\
                  columns =['Explained Variance'], index=[nPC])

In [117]:
print ('PCs = ', len(V))
dfV

PCs =  7


Unnamed: 0,Tingresso,Solvente,Nd_on_PBu,Al_on_Nd,Sty_on_Bde,Mooney,VEI
1,-0.358783,-0.418246,0.329424,-0.414178,-0.363168,0.350172,0.402206
2,0.449975,-0.174692,-0.563403,-0.084179,-0.372736,0.516521,-0.191753
3,0.408843,0.140916,0.443297,-0.407815,0.50385,0.408207,-0.172243
4,-0.138822,-0.080493,-0.410253,0.182902,0.58702,0.3129,0.574447
5,0.668456,-0.284311,0.29085,0.317304,-0.121018,-0.221395,0.472649
6,0.010482,0.799333,0.124231,0.161756,-0.340413,0.301865,0.335195
7,-0.188228,-0.220116,0.33047,0.70339,0.013905,0.453771,-0.325653


In [118]:
EV

Unnamed: 0,Explained Variance
1,73.34
2,87.23
3,92.85
4,96.04
5,98.3
6,99.38
7,100.01


In [119]:
#Calculate ellipse bounds and plot with scores
theta = np.concatenate((np.linspace(-np.pi, np.pi, 50), np.linspace(np.pi, -np.pi, 50)))
circle = np.array((np.cos(theta), np.sin(theta)))
sigma = np.cov(np.array((x_red[:, 0], x_red[:, 1])))
ed = np.sqrt(scipy.stats.chi2.ppf(0.95, 2))
ell = np.transpose(circle).dot(np.linalg.cholesky(sigma) * ed)
a, b = np.max(ell[: ,0]), np.max(ell[: ,1]) #95% ellipse bounds
t = np.linspace(0, 2 * np.pi, 100)

In [122]:
col = [x for x in range(len(X))]
fig=go.Figure()
fig.add_trace(go.Scatter(x=x_red[:,0], y=x_red[:,1], mode='markers', marker=dict(symbol=[200], \
             color=col, line_width=2, size=10)))
fig.add_trace(go.Scatter(x=a * np.cos(t),y=b * np.sin(t),mode='lines',\
                        line=dict(color='lightgreen', width=2, dash='dash')))
fig.update_layout(height=600, width=800, title='PCA - Score plot', xaxis_title='PC1',yaxis_title='PC2', \
                  showlegend=False, xaxis_zeroline=True, yaxis_zeroline=True, \
                  xaxis_zerolinecolor='blue', yaxis_zerolinecolor='blue', xaxis_showgrid=False, yaxis_showgrid=False)

In [123]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=X, y=x_red[:,0], mode='lines', name='PC1', marker_color = 'blue'))
fig.add_trace(go.Scatter(x=X, y=x_red[:,1], mode='lines', name='PC2', marker_color = 'red'))
fig.update_layout(height=600, width=800, title='PCA', xaxis_title='n° samples',yaxis_title='PC1, PC2', xaxis_zeroline=True)

In [126]:
arr = np.array(dfV)
label = np.array(df.columns)
plot = go.Scatter(x=arr[0,:], y=x_red[1,:], mode='markers', marker=dict(symbol=[201],color='blue', size=10), \
              text = label)
fig=go.Figure()
fig.add_trace(plot)
fig.update_layout(height=600, width=800, title='PCA - Loadings plot', xaxis_title='PC1',yaxis_title='PC2', \
                  xaxis_zeroline=True, yaxis_zeroline=True)