# 01. Feature Engineering & Matrix Construction

**Objective:** Transform raw data into a numerical matrix $X$ suitable for linear algebra operations.

**Steps:**
1. Load processed data.
2. Handle missing values.
3. One-Hot Encoding.
4. Save Matrix $X$ and Target $y$.

In [10]:
import os

# 1. Volvemos a la ruta ra√≠z de Colab para ver la carpeta "desde fuera"
%cd /content

# 2. Nombre de la carpeta de tu repositorio
carpeta = "Credit-Risk-Algebraic-ML"

# 3. Ejecutamos el comando de sistema 'rm -rf'
# rm = remove (borrar)
# -r = recursive (borrar carpetas y subcarpetas)
# -f = force (no preguntar confirmaci√≥n)
print(f"üóëÔ∏è Eliminando la carpeta: {carpeta}...")
!rm -rf {carpeta}

# 4. Verificaci√≥n
if not os.path.exists(carpeta):
    print("‚ú® ¬°Listo! La carpeta ha sido eliminada.")
    print("Ahora puedes volver a correr el bloque de 'Git Clone' una sola vez.")
else:
    print("‚ö†Ô∏è Algo pas√≥, la carpeta sigue ah√≠.")

/content
üóëÔ∏è Eliminando la carpeta: Credit-Risk-Algebraic-ML...
‚ú® ¬°Listo! La carpeta ha sido eliminada.
Ahora puedes volver a correr el bloque de 'Git Clone' una sola vez.


In [11]:
import pandas as pd
import numpy as np
import os
import shutil

# --- 1. SETUP DEL ENTORNO (Git Clone) ---
if not os.path.exists('Credit-Risk-Algebraic-ML'):
    !git clone https://github.com/adriangonz-afk/Credit-Risk-Algebraic-ML.git
os.chdir('Credit-Risk-Algebraic-ML')

print("üîÑ Iniciando Protocolo de Ingenier√≠a de Datos...")

# --- 2. LIMPIEZA Y PREPARACI√ìN ---
if os.path.exists('data'):
    shutil.rmtree('data')
os.makedirs('data/raw', exist_ok=True)
os.makedirs('data/processed', exist_ok=True)

# --- 3. DESCARGA OFICIAL (UCI - Raw Data) ---
# Esta es la fuente que S√ç funciona
raw_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
raw_path = "data/raw/german.data"
!wget -q {raw_url} -O {raw_path}

# --- 4. PARSING Y LIMPIEZA ---
columns = [
    'checking_account', 'duration', 'credit_history', 'purpose',
    'credit_amount', 'savings_account', 'employment_since', 'installment_rate',
    'personal_status_sex', 'other_debtors', 'residence_since', 'property',
    'age', 'other_installment_plans', 'housing', 'existing_credits',
    'job', 'people_liable', 'telephone', 'foreign_worker', 'target'
]

df = pd.read_csv(raw_path, names=columns, sep=' ', index_col=False)

# Transformar Target (1->0, 2->1)
df['target'] = df['target'].map({1: 0, 2: 1})

# One-Hot Encoding
df_encoded = pd.get_dummies(df, drop_first=True)

# Separar X e y
X = df_encoded.drop('target', axis=1).astype(float)
y = df_encoded['target']

# --- 5. GUARDADO ---
np.save('data/processed/X_matrix.npy', X.values)
np.save('data/processed/y_vector.npy', y.values)
df_encoded.to_csv('data/processed/german_credit_clean.csv', index=False)

# Guardar nombres de columnas (importante para el Notebook 02)
import json
with open('data/processed/feature_names.json', 'w') as f:
    json.dump(list(X.columns), f)

print(f"‚úÖ ¬°LISTO! Matrices generadas. Shape X: {X.shape}")

Cloning into 'Credit-Risk-Algebraic-ML'...
remote: Enumerating objects: 40, done.[K
remote: Counting objects:   2% (1/40)[Kremote: Counting objects:   5% (2/40)[Kremote: Counting objects:   7% (3/40)[Kremote: Counting objects:  10% (4/40)[Kremote: Counting objects:  12% (5/40)[Kremote: Counting objects:  15% (6/40)[Kremote: Counting objects:  17% (7/40)[Kremote: Counting objects:  20% (8/40)[Kremote: Counting objects:  22% (9/40)[Kremote: Counting objects:  25% (10/40)[Kremote: Counting objects:  27% (11/40)[Kremote: Counting objects:  30% (12/40)[Kremote: Counting objects:  32% (13/40)[Kremote: Counting objects:  35% (14/40)[Kremote: Counting objects:  37% (15/40)[Kremote: Counting objects:  40% (16/40)[Kremote: Counting objects:  42% (17/40)[Kremote: Counting objects:  45% (18/40)[Kremote: Counting objects:  47% (19/40)[Kremote: Counting objects:  50% (20/40)[Kremote: Counting objects:  52% (21/40)[Kremote: Counting objects:  55% (22/40)[K

In [12]:
df

Unnamed: 0,checking_account,duration,credit_history,purpose,credit_amount,savings_account,employment_since,installment_rate,personal_status_sex,other_debtors,...,property,age,other_installment_plans,housing,existing_credits,job,people_liable,telephone,foreign_worker,target
0,A11,6,A34,A43,1169,A65,A75,4,A93,A101,...,A121,67,A143,A152,2,A173,1,A192,A201,0
1,A12,48,A32,A43,5951,A61,A73,2,A92,A101,...,A121,22,A143,A152,1,A173,1,A191,A201,1
2,A14,12,A34,A46,2096,A61,A74,2,A93,A101,...,A121,49,A143,A152,1,A172,2,A191,A201,0
3,A11,42,A32,A42,7882,A61,A74,2,A93,A103,...,A122,45,A143,A153,1,A173,2,A191,A201,0
4,A11,24,A33,A40,4870,A61,A73,3,A93,A101,...,A124,53,A143,A153,2,A173,2,A191,A201,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,A14,12,A32,A42,1736,A61,A74,3,A92,A101,...,A121,31,A143,A152,1,A172,1,A191,A201,0
996,A11,30,A32,A41,3857,A61,A73,4,A91,A101,...,A122,40,A143,A152,1,A174,1,A192,A201,0
997,A14,12,A32,A43,804,A61,A75,4,A93,A101,...,A123,38,A143,A152,1,A173,1,A191,A201,0
998,A11,45,A32,A43,1845,A61,A73,4,A93,A101,...,A124,23,A143,A153,1,A173,1,A192,A201,1


In [17]:
import os
import getpass

# --- 1. SEGURIDAD: INGRESO INVISIBLE ---
print("üîê Por favor, pega tu nuevo token cuando aparezca la casilla:")
# Esto pedir√° el token sin guardarlo en el c√≥digo
token = getpass.getpass('Token de GitHub: ')

username = "adriangonz-afk"
repo_name = "Credit-Risk-Algebraic-ML"

# --- 2. ACTUALIZACI√ìN DE CREDENCIALES ---
# Construimos la URL segura usando el token que acabas de ingresar
git_url = f"https://{token}@github.com/{username}/{repo_name}.git"

print("\nüîß Configurando acceso seguro...")

# Actualizamos la URL del remoto 'origin' para usar el nuevo token
!git remote set-url origin {git_url}

# --- 3. SUBIDA (PUSH) ---
print("üöÄ Subiendo cambios (ahora sin revelar el secreto)...")

try:
    # Intentamos subir. Si el notebook se sube, solo llevar√° la l√≠nea 'getpass'
    !git push origin main
    print("\n‚úÖ ¬°√âXITO! Tu c√≥digo est√° en GitHub y tu token sigue secreto.")
except Exception as e:
    print(f"\n‚ùå Algo fall√≥: {e}")

üîê Por favor, pega tu nuevo token cuando aparezca la casilla:
Token de GitHub: ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑

üîß Configurando acceso seguro...
üöÄ Subiendo cambios (ahora sin revelar el secreto)...
Everything up-to-date

‚úÖ ¬°√âXITO! Tu c√≥digo est√° en GitHub y tu token sigue secreto.
