In [35]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.datasets import make_classification

import warnings
warnings.filterwarnings('ignore')

In [36]:
np.random.seed(42)
num_samples = 200

data = {
    "Idade": np.random.randint(18, 70, num_samples),
    "Renda_Anual": np.random.randint(20000, 120000, num_samples),
    "Score_Credito": np.random.randint(300, 850, num_samples),
    "Dividas": np.random.randint(0, 50000, num_samples),
    "Pagamentos_Atraso": np.random.randint(0, 10, num_samples),
    "Uso_Cartao_Credito": np.random.randint(0, 10000, num_samples),
    "Qtd_Contas_Banco": np.random.randint(1, 5, num_samples),
    "Transacoes_Mensais": np.random.randint(10, 300, num_samples),
    "Investimentos": np.random.randint(0, 50000, num_samples),
    "Saldo_Conta": np.random.randint(-5000, 100000, num_samples),
    "Tipo_Vip": np.random.choice(['Comum', 'Black', 'Platinum'], size=num_samples),
    "Risco_Credito": np.random.choice([0, 1], size=num_samples) 
}

df = pd.DataFrame(data)

df.head()

Unnamed: 0,Idade,Renda_Anual,Score_Credito,Dividas,Pagamentos_Atraso,Uso_Cartao_Credito,Qtd_Contas_Banco,Transacoes_Mensais,Investimentos,Saldo_Conta,Tipo_Vip,Risco_Credito
0,56,78053,454,30355,7,2192,4,35,24860,84899,Comum,0
1,69,41959,789,13116,3,1248,1,56,21195,15049,Comum,0
2,46,25530,685,25470,7,4033,1,297,11939,70717,Black,0
3,32,114856,403,42344,3,1997,2,275,25338,11082,Black,0
4,60,23748,692,42918,7,3687,4,25,21730,-4339,Black,0


In [37]:
X = df.drop(columns=["Tipo_Vip", "Risco_Credito"])
y = df["Risco_Credito"]

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Usando Pipeline

In [39]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('pca', PCA(n_components=3)),   
    ('svm', SVC(kernel='linear'))  
])

pipeline.fit(X_train, y_train)
print("Acurácia Pipeline:", pipeline.score(X_test, y_test))

Acurácia Pipeline: 0.475


### Usando make_pipeline

In [40]:
make_pipeline_model = make_pipeline(
    StandardScaler(),  
    PCA(n_components=3), 
    SVC(kernel='linear') 
)

make_pipeline_model.fit(X_train, y_train)
print("Acurácia make_pipeline:", make_pipeline_model.score(X_test, y_test))

Acurácia make_pipeline: 0.475


### Usando ColumnTransformer

In [41]:
X = df.drop(columns=["Risco_Credito"])
y = df["Risco_Credito"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [42]:
numeric_features = X_train.drop(columns="Tipo_Vip", axis=1).columns
categorical_features = ["Tipo_Vip"]

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features), 
        ('cat', OneHotEncoder(), categorical_features)
    ]
)

column_transformer_pipeline = Pipeline([
    ('preprocessor', preprocessor),  
    ('pca', PCA(n_components=3)),  
    ('svm', SVC(kernel='linear'))  
])

column_transformer_pipeline.fit(X_train, y_train)
print("Acurácia column_transformer:", column_transformer_pipeline.score(X_test, y_test))

Acurácia column_transformer: 0.475
