## Singular value decomposition: 

Problem statement: 

    1. Calculate the eigen value/eigen vector
    2. Calculate principle components with eigen decomposition
    3. From SVD calculate themes, observations in new space, features in new space
    4. Compare both principle components using eigen decomposistion & SVD

In [1]:
import pandas as pd
import numpy as np

In [2]:
_d = [[0,0],[1,2], [2, 3], [3,6], [4,8], [5,9]]
columns = ['x', 'y']
data = pd.DataFrame(_d, columns=columns)
data

Unnamed: 0,x,y
0,0,0
1,1,2
2,2,3
3,3,6
4,4,8
5,5,9


#### Calculate eigen values/vectors

In [30]:
covariance = np.cov(data.T)
covariance

array([[ 3.5       ,  6.6       ],
       [ 6.6       , 12.66666667]])

In [41]:
eigen_values, eigen_vector = np.linalg.eig(covariance)
print(eigen_values)
print(eigen_vector)

[ 0.04797743 16.11868923]
[[-0.88611393 -0.46346747]
 [ 0.46346747 -0.88611393]]


In [42]:
# ===> Sort eigen values and vectors based on weight: 
_idx = eigen_values.argsort()[::-1]
eigen_values = eigen_values[_idx]
eigen_vector = eigen_vector[:, _idx]
eigen_vector


array([[-0.46346747, -0.88611393],
       [-0.88611393,  0.46346747]])

In [43]:
principle_component = pd.DataFrame((np.linalg.inv(eigen_vector) @ data.T).T) 
principle_component.columns = ['x', 'y']
principle_component

Unnamed: 0,x,y
0,0.0,0.0
1,-2.235695,0.040821
2,-3.585277,-0.381825
3,-6.707086,0.122463
4,-8.942781,0.163284
5,-10.292363,-0.259362


#### Principle component in SVD:

In [35]:
theames, observations, vt_new_features = np.linalg.svd(data, full_matrices=False)

In [36]:
theames

array([[ 1.47280506e-17,  2.49365625e-16],
       [-1.41730722e-01, -1.12691117e-01],
       [-2.27476796e-01,  7.22512837e-01],
       [-4.25192165e-01, -3.38073350e-01],
       [-5.66922887e-01, -4.50764467e-01],
       [-6.52668961e-01,  3.84439487e-01]])

In [37]:
diagnoal_matrix = np.diag(observations)

In [38]:
re = theames @ diagnoal_matrix
re.round(4)

array([[  0.    ,   0.    ],
       [ -2.2354,  -0.0558],
       [ -3.5878,   0.3578],
       [ -6.7061,  -0.1674],
       [ -8.9415,  -0.2232],
       [-10.2939,   0.1904]])

### Food ratings SVD: 

In [59]:
food = pd.read_csv('https://cdn.upgrad.com/UpGrad/temp/2c2ac0b4-4788-4464-a8cb-2ffce4874694/MyFoodRatings.csv')
food

Unnamed: 0,Name,Chicken,Mutton,Paneer,ChowMein,SpringRolls,Momo,Sushi,Ramen,Tempura
0,A,5,5,5,0,0,0,0,0,0
1,B,4,4,4,0,0,0,0,0,0
2,C,3,3,3,0,0,0,0,0,0
3,D,2,2,2,0,0,0,0,0,0
4,E,0,0,0,2,2,2,0,0,0
5,F,0,0,0,1,1,1,0,0,0
6,G,0,0,0,5,3,4,0,0,0
7,H,0,0,0,4,4,4,0,0,0
8,I,0,0,0,0,0,0,2,2,4
9,J,0,0,0,0,0,0,1,1,1


In [66]:
user_theam, theam_weight, pc_features = np.linalg.svd(food.drop(columns='Name', axis=1), full_matrices=True)

In [67]:
user_theam.shape

(12, 12)

In [68]:
theam_weight.shape

(9,)

In [69]:
pc_features.shape

(9, 9)

In [72]:
theam_weight.round(3)

array([12.728, 10.577,  8.848,  1.242,  1.061,  0.407,  0.   ,  0.   ,
        0.   ])

In [74]:
np.diag(theam_weight).round(3)

array([[12.728,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
         0.   ],
       [ 0.   , 10.577,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
         0.   ],
       [ 0.   ,  0.   ,  8.848,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
         0.   ],
       [ 0.   ,  0.   ,  0.   ,  1.242,  0.   ,  0.   ,  0.   ,  0.   ,
         0.   ],
       [ 0.   ,  0.   ,  0.   ,  0.   ,  1.061,  0.   ,  0.   ,  0.   ,
         0.   ],
       [ 0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.407,  0.   ,  0.   ,
         0.   ],
       [ 0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
         0.   ],
       [ 0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
         0.   ],
       [ 0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
         0.   ]])

In [77]:
user_theam.round(2)

array([[-0.68,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.68, -0.26,
         0.  ,  0.  ,  0.  ],
       [-0.54,  0.  ,  0.  , -0.  ,  0.  ,  0.  , -0.44, -0.29,  0.65,
        -0.02,  0.06, -0.03],
       [-0.41,  0.  ,  0.  , -0.  ,  0.  ,  0.  , -0.  , -0.65, -0.64,
        -0.  ,  0.  , -0.  ],
       [-0.27, -0.  , -0.  ,  0.  , -0.  , -0.  ,  0.88, -0.15,  0.32,
         0.03, -0.11,  0.06],
       [ 0.  , -0.33, -0.  ,  0.  , -0.29, -0.  , -0.13,  0.  ,  0.  ,
         0.22, -0.74,  0.44],
       [ 0.  , -0.16,  0.  , -0.  , -0.14,  0.  ,  0.02,  0.  ,  0.  ,
         0.34,  0.62,  0.67],
       [ 0.  , -0.66,  0.  ,  0.  ,  0.75, -0.  ,  0.  ,  0.  ,  0.  ,
         0.  , -0.  ,  0.  ],
       [ 0.  , -0.65,  0.  , -0.  , -0.58,  0.  ,  0.06,  0.  ,  0.  ,
        -0.19,  0.22, -0.39],
       [ 0.  ,  0.  , -0.54, -0.84,  0.  ,  0.08,  0.  ,  0.  ,  0.  ,
        -0.  , -0.  , -0.  ],
       [ 0.  ,  0.  , -0.19,  0.16,  0.  ,  0.37,  0.  ,  0.  ,  0.  ,
         0.8 , -0.  

In [82]:
_weights = pd.DataFrame(np.diag(theam_weight[:3]))
_weights.columns = ['Indian', 'Chinese', 'Japanese']
_weights

Unnamed: 0,Indian,Chinese,Japanese
0,12.727922,0.0,0.0
1,0.0,10.577038,0.0
2,0.0,0.0,8.848261


In [90]:
pd.DataFrame(pc_features[:,:].round(3))

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,-0.577,-0.577,-0.577,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,-0.638,-0.512,-0.575,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,-0.476,-0.558,-0.68
3,0.0,0.0,0.0,0.0,0.0,0.0,0.296,0.626,-0.721
4,-0.0,-0.0,-0.0,0.653,-0.756,-0.051,0.0,0.0,0.0
5,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.828,-0.545,-0.133
6,0.0,0.0,0.0,-0.408,-0.408,0.816,0.0,0.0,0.0
7,0.816,-0.408,-0.408,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,-0.707,0.707,0.0,0.0,0.0,0.0,0.0,0.0


In [91]:
five_ratings = pd.read_csv("https://cdn.upgrad.com/UpGrad/temp/c6a4f4ae-de6d-4d40-b4de-d2daa9546e6c/FoodRatings_all_same_5.csv")
five_ratings

Unnamed: 0,Name,Chicken,Mutton,Paneer,ChowMein,SpringRolls,Momo,Sushi,Ramen,Tempura
0,A,5,5,5,0,0,0,0,0,0
1,B,5,5,5,0,0,0,0,0,0
2,C,5,5,5,0,0,0,0,0,0
3,D,5,5,5,0,0,0,0,0,0
4,E,0,0,0,5,5,5,0,0,0
5,F,0,0,0,5,5,5,0,0,0
6,G,0,0,0,5,5,5,0,0,0
7,H,0,0,0,5,5,5,0,0,0
8,I,0,0,0,0,0,0,5,5,5
9,J,0,0,0,0,0,0,5,5,5


In [92]:
restarant_theam, weights, pc_features = np.linalg.svd(five_ratings.drop(columns=['Name'], axis=1), full_matrices=True)

In [94]:
weights.round(3)

array([17.321, 17.321, 17.321,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   ])

In [95]:
different_ratings = pd.read_csv('https://cdn.upgrad.com/UpGrad/temp/c0873162-18ed-489b-a7ce-8d75420b8e3b/FoodRatings_all_same_5_4_3.csv')
different_ratings

Unnamed: 0,Name,Chicken,Mutton,Paneer,ChowMein,SpringRolls,Momo,Sushi,Ramen,Tempura
0,A,5,5,5,0,0,0,0,0,0
1,B,5,5,5,0,0,0,0,0,0
2,C,5,5,5,0,0,0,0,0,0
3,D,5,5,5,0,0,0,0,0,0
4,E,0,0,0,4,4,4,0,0,0
5,F,0,0,0,4,4,4,0,0,0
6,G,0,0,0,4,4,4,0,0,0
7,H,0,0,0,4,4,4,0,0,0
8,I,0,0,0,0,0,0,3,3,3
9,J,0,0,0,0,0,0,3,3,3


In [97]:
restarant_theam, weights, pc_features = np.linalg.svd(different_ratings.drop(columns=['Name'], axis=1), full_matrices=True)
weights.round(3)

array([17.321, 13.856, 10.392,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   ])