### By using Numpy

In [1]:
# SVD using Numpy

import numpy as np

In [2]:
A = np.array([[2,3,4],[1,3,5],[5,2,3]])
A

array([[2, 3, 4],
       [1, 3, 5],
       [5, 2, 3]])

In [3]:
# Getting U, S, Vt 

U,S,Vt = np.linalg.svd(A)

print("Shape of U :", U.shape)
print(U)
print("Shape of S :", S.shape)
print(S)
print("Shape of Vt :", Vt.shape)
print(Vt)

Shape of U : (3, 3)
[[-0.56007973 -0.20188704 -0.8034627 ]
 [-0.58531352 -0.5899184   0.55624128]
 [-0.58627534  0.78181705  0.21223413]]
Shape of S : (3,)
[9.52398371 3.3393669  0.37731013]
Shape of Vt : (3, 3)
[[-0.48686031 -0.48390785 -0.72718652]
 [ 0.87303757 -0.24309465 -0.42274152]
 [ 0.02779278 -0.84067722  0.54082287]]


In [4]:
sigma = np.diag(S)
sigma

array([[9.52398371, 0.        , 0.        ],
       [0.        , 3.3393669 , 0.        ],
       [0.        , 0.        , 0.37731013]])

In [5]:
# Transformation or Projection of Data

T = U.dot(sigma)
T = A.dot(Vt.T)
T

array([[-5.33419024, -0.6741749 , -0.30315462],
       [-5.57451644, -1.96995399,  0.20987547],
       [-5.58367681,  2.61077398,  0.08007809]])

### By using Scikit Learn

In [6]:
# SVD by using Scikit Learn 

from sklearn.decomposition import TruncatedSVD 

svd = TruncatedSVD()

In [7]:
svd.fit(A)

TruncatedSVD()

In [8]:
T = svd.transform(A)
T

array([[ 5.33419024, -0.6741749 ],
       [ 5.57451644, -1.96995399],
       [ 5.58367681,  2.61077398]])

In [9]:
svd.singular_values_

array([9.52398371, 3.3393669 ])

### By using Randomized SVD

In [10]:
# Randomised SVD 

from sklearn.utils.extmath import randomized_svd

In [11]:
U, S, Vt = randomized_svd(A, n_components = 2)

In [12]:
print(U)

[[ 0.56007973 -0.20188704]
 [ 0.58531352 -0.5899184 ]
 [ 0.58627534  0.78181705]]


In [13]:
print(S)

[9.52398371 3.3393669 ]


In [14]:
print(Vt)

[[ 0.48686031  0.48390785  0.72718652]
 [ 0.87303757 -0.24309465 -0.42274152]]


# SVD on Diabetes Dataset

In [15]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline 
import seaborn as sns 

In [16]:
df = pd.read_csv("../DataSets/diabetes.csv")

In [17]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [18]:
x = df.drop(columns = ["Outcome"])
y = df["Outcome"]

In [19]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)


In [20]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

sc_x_train = sc.fit_transform(x_train)

sc_x_test = sc.fit_transform(x_test)

In [21]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

model.fit(sc_x_train,y_train)
y_pred = model.predict(sc_x_test)

from sklearn.metrics import accuracy_score, classification_report
print("Accuracy is :",accuracy_score(y_test,y_pred)) # Logistic Accuracy with Scaled Data
print(classification_report(y_test,y_pred))


Accuracy is : 0.7792207792207793
              precision    recall  f1-score   support

           0       0.75      0.93      0.83        90
           1       0.86      0.56      0.68        64

    accuracy                           0.78       154
   macro avg       0.80      0.75      0.76       154
weighted avg       0.79      0.78      0.77       154



In [22]:
from sklearn.decomposition import TruncatedSVD 

svd = TruncatedSVD() 

s_xtrain = svd.fit_transform(sc_x_train)
s_xtest = svd.fit_transform(sc_x_test)


In [23]:
model = LogisticRegression()

model.fit(s_xtrain,y_train)

y_pred = model.predict(s_xtest)

from sklearn.metrics import accuracy_score, classification_report

print("Accuracy is :",accuracy_score(y_test,y_pred)) # Logistic Accuracy with SVD 
print(classification_report(y_test,y_pred))

Accuracy is : 0.4675324675324675
              precision    recall  f1-score   support

           0       0.53      0.69      0.60        90
           1       0.26      0.16      0.20        64

    accuracy                           0.47       154
   macro avg       0.40      0.42      0.40       154
weighted avg       0.42      0.47      0.43       154



In [24]:
s_xtrain = svd.fit_transform(x_train)
s_xtest = svd.fit_transform(x_test)
model = LogisticRegression()
model.fit(s_xtrain,y_train)
y_pred = model.predict(s_xtest)
from sklearn.metrics import accuracy_score, classification_report
print("Accuracy is :",accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))


Accuracy is : 0.7012987012987013
              precision    recall  f1-score   support

           0       0.68      0.92      0.78        90
           1       0.78      0.39      0.52        64

    accuracy                           0.70       154
   macro avg       0.73      0.66      0.65       154
weighted avg       0.72      0.70      0.67       154



In [25]:
comps=list(range(1,8))
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=0)
for i in comps:
    svd=TruncatedSVD(n_components=i)
    sxtrain= svd.fit_transform(xtrain)
    sxtest= svd.fit_transform(xtest)
    model=LogisticRegression()
    model.fit(sxtrain,ytrain)
    ypred=model.predict(sxtest)
    print("for no. of component ",i," ,Accuracy is => ",accuracy_score(ytest,ypred))

for no. of component  1  ,Accuracy is =>  0.7142857142857143
for no. of component  2  ,Accuracy is =>  0.7597402597402597
for no. of component  3  ,Accuracy is =>  0.7727272727272727
for no. of component  4  ,Accuracy is =>  0.7402597402597403
for no. of component  5  ,Accuracy is =>  0.7597402597402597
for no. of component  6  ,Accuracy is =>  0.7792207792207793
for no. of component  7  ,Accuracy is =>  0.7987012987012987
