In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Load the dataset 
df = sns.load_dataset('titanic')


In [4]:
df.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone'],
      dtype='object')

In [14]:
# Process the data
# Selecting features for SVD.
features = ['pclass' , 'sex' , 'age' , 'fare', 'survived']

# Pipeline for numerical features
numeric_features = ['age' , 'fare']
numeric_transformer = Pipeline( steps=[
    ('imputer' , SimpleImputer(strategy='median')),
    ('scaler' , StandardScaler())
])

#Pipeline for categorical features 
categorical_features = ['pclass','sex']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Preprocessor 
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
])

# Applying preprocessing to the dataset
X = preprocessor.fit_transform(df[features])



In [15]:
X.shape

(891, 7)

In [16]:
X

array([[-0.56573646, -0.50244517,  0.        , ...,  1.        ,
         0.        ,  1.        ],
       [ 0.66386103,  0.78684529,  1.        , ...,  0.        ,
         1.        ,  0.        ],
       [-0.25833709, -0.48885426,  0.        , ...,  1.        ,
         1.        ,  0.        ],
       ...,
       [-0.1046374 , -0.17626324,  0.        , ...,  1.        ,
         1.        ,  0.        ],
       [-0.25833709, -0.04438104,  1.        , ...,  0.        ,
         0.        ,  1.        ],
       [ 0.20276197, -0.49237783,  0.        , ...,  1.        ,
         0.        ,  1.        ]])

In [17]:
# perform SVD 
svd = TruncatedSVD(n_components=4) # YOu can adjust n-components
Y = svd.fit_transform(X)

print('Singular Values:' , svd.singular_values_)
print('Explained Variance Ratio:', svd.explained_variance_ratio_)

Singular Values: [34.0919237  29.83381513 28.43241553 18.6007055 ]
Explained Variance Ratio: [0.37544109 0.16349524 0.21253133 0.12019999]


-----------
# SVD

In [1]:
import numpy as np

In [4]:
# Generate a matrix of 4x3 dataset
A = np.random.rand(4, 3)
print(A)

[[0.47065075 0.157824   0.05453219]
 [0.91970642 0.43883198 0.33533489]
 [0.33754561 0.44275974 0.58583554]
 [0.6960723  0.23308686 0.35834028]]


In [24]:
U ,Sigma, VT =np.linalg.svd(A)
# Print each matrix 
print(f'U matrix : \n {U} \n {U.shape}')
print('===================================')
print(f'Sigma matrix : \n {Sigma} \n {Sigma.shape}')
print('===================================')
print(f'VT matrix : \n {VT} \n {VT.shape}')

U matrix : 
 [[-0.29417256  0.39477249  0.26084992 -0.83040623]
 [-0.67198806  0.29936232  0.43691125  0.5176126 ]
 [-0.44783227 -0.85863001  0.14223568 -0.20486525]
 [-0.51121503  0.1314967  -0.84901996 -0.02308518]] 
 (4, 4)
Sigma matrix : 
 [1.58184685 0.45266946 0.11644098] 
 (3,)
VT matrix : 
 [[-0.79874402 -0.416448   -0.4342569 ]
 [ 0.58062198 -0.34427398 -0.73780319]
 [-0.15775331  0.84145499 -0.51678564]] 
 (3, 3)
