# 01. Feature Engineering & Matrix Construction

**Objective:** Transform raw data into a numerical matrix $X$ suitable for linear algebra operations.

**Steps:**
1. Load processed data.
2. Handle missing values.
3. One-Hot Encoding.
4. Save Matrix $X$ and Target $y$.

In [None]:
import os
# --- SETUP INICIAL ---
if not os.path.exists('Credit-Risk-Algebraic-ML'):
    !git clone https://github.com/adriangonz-afk/Credit-Risk-Algebraic-ML.git
os.chdir('Credit-Risk-Algebraic-ML')
!pip install -q kagglehub
import kagglehub, shutil, pandas as pd
import numpy as np

# Restaurar datos si no existen
if not os.path.exists('data/processed/german_credit_clean.csv'):
    print('⬇️ Restaurando datos...')
    os.makedirs('data/raw', exist_ok=True)
    os.makedirs('data/processed', exist_ok=True)
    path = kagglehub.dataset_download('uciml/german-credit')
    shutil.copy(os.path.join(path, 'german_credit_data.csv'), 'data/raw/german_credit_data.csv')
    df = pd.read_csv('data/raw/german_credit_data.csv', index_col=0)
    df.columns = df.columns.str.lower().str.replace(' ', '_')
    if 'risk' in df.columns:
        df['target'] = df['risk'].map({'good': 0, 'bad': 1})
        df.drop(columns=['risk'], inplace=True)
    df.to_csv('data/processed/german_credit_clean.csv', index=False)

print('✅ Setup completo.')

In [None]:
# Cargar datos
df = pd.read_csv('data/processed/german_credit_clean.csv')

# Rellenar nulos
df['saving_accounts'] = df['saving_accounts'].fillna('unknown')
df['checking_account'] = df['checking_account'].fillna('unknown')

# One-Hot Encoding
df_encoded = pd.get_dummies(df, drop_first=True)
X = df_encoded.drop('target', axis=1).astype(float)
y = df_encoded['target']

print(f'Matrix X Shape: {X.shape}')

# Guardar matrices
np.save('data/processed/X_matrix.npy', X.values)
np.save('data/processed/y_vector.npy', y.values)
import json
with open('data/processed/feature_names.json', 'w') as f:
    json.dump(list(X.columns), f)
print('✅ Matrices guardadas.')

In [1]:
print("Hola Mundo")

Hola Mundo
