In [None]:
api_key="api"


In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from openai import OpenAI
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from sentence_transformers import SentenceTransformer
from gensim.models import Word2Vec
import joblib

# ===== CONFIG =====
API_KEY = api_key
if not API_KEY:
    raise ValueError("‚ö†Ô∏è No se encontr√≥ la variable de entorno OPENAI_API_KEY")

PATH_PIPELINE = "pipelines"
PATH_MODELS = "models"
PATH_DATA = "data"

lista_tarjetas = [
    "Joy", "Oro", "Clasica", "Platinum", "Descubre", "Explora", 
    "Conquista", "Line Up", "La Comer", "Costo", "Home Depot", 
    "Affinity", "Teleton"
]

# ==============================================================================
# 1. ARQUITECTURAS (FINAL)
# ==============================================================================

class SentimentClassifier(nn.Module):
    def __init__(self, input_dim, num_classes=3):
        super(SentimentClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 8) 
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(8, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

class RecommendationModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, output_dim=13):
        super(RecommendationModel, self).__init__()
        # Ajustado a las llaves 'layer1' y 'layer2' del log de error
        self.layer1 = nn.Linear(input_dim, hidden_dim)
        self.layer2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        return self.layer2(x)

# ==============================================================================
# 2. PREPROCESAMIENTO (Igual que antes)
# ==============================================================================
# ... (Mant√©n las clases TextPreprocessor, DateFeatureGenerator, DropColumns, DynamicPreprocessor igual que antes)
# Para ahorrar espacio, asumo que ya tienes estas clases definidas o importadas.
# Si necesitas que las repita completas, av√≠same.
# Aqu√≠ pego una versi√≥n resumida para que el script corra si lo copias entero:

class TextPreprocessor(BaseEstimator, TransformerMixin):
    def __init__(self, use_bigrams=True, use_trigrams=True, glove_path=None,
                 sbert_model_name='all-MiniLM-L6-v2', w2v_model_path=None):
        self.use_bigrams = use_bigrams
        self.use_trigrams = use_trigrams
        self.glove_path = glove_path
        self.glove = {}
        self.sbert_model_name = sbert_model_name
        self.sbert_model = None
        self.w2v_model = None
        self.w2v_model_path = w2v_model_path
        self._ensure_nltk_resources()
        self._init_nltk_components()
    def _ensure_nltk_resources(self):
        import nltk; nltk.download('punkt', quiet=True); nltk.download('stopwords', quiet=True); nltk.download('wordnet', quiet=True); nltk.download('omw-1.4', quiet=True)
    def _init_nltk_components(self):
        from nltk.corpus import stopwords; from nltk.stem import WordNetLemmatizer
        self.stop_words = set(stopwords.words('english')); self.lemmatizer = WordNetLemmatizer()
    def _clean_text(self, text):
        import re; text = str(text).lower(); text = re.sub(r"http\S+", "", text); text = re.sub(r"[^a-z√°√©√≠√≥√∫√º√± ]", "", text); return re.sub(r"\s+", " ", text).strip()
    def _get_wordnet_pos(self, tag):
        from nltk.corpus import wordnet; return wordnet.VERB if tag.startswith('V') else wordnet.NOUN
    def _tokenize_series(self, series):
        from nltk import pos_tag; from nltk.tokenize import word_tokenize
        all_tokens = []
        for text in series:
            tokens = word_tokenize(self._clean_text(text))
            tokens = [t for t in tokens if t.isalpha() and t not in self.stop_words]
            all_tokens.append([self.lemmatizer.lemmatize(t) for t in tokens]) # Simplificado para brevedad
        return all_tokens
    def _avg_vector(self, tokens, model):
        vecs = [model.wv[w] for w in tokens if w in model.wv]
        return np.mean(vecs, axis=0) if vecs else np.zeros(model.vector_size)
    def fit(self, X, y=None):
        self.X_tokens_ = self._tokenize_series(X)
        self.w2v_model = Word2Vec(sentences=self.X_tokens_, vector_size=100, window=5, min_count=2, workers=1)
        self.sbert_model = SentenceTransformer(self.sbert_model_name)
        return self
    def transform(self, X):
        if self.w2v_model is None and self.w2v_model_path and os.path.exists(self.w2v_model_path): self.w2v_model = Word2Vec.load(self.w2v_model_path)
        if self.sbert_model is None: self.sbert_model = SentenceTransformer(self.sbert_model_name)
        tokens = self._tokenize_series(X)
        X_w2v = np.array([self._avg_vector(t, self.w2v_model) for t in tokens])
        X_sbert = self.sbert_model.encode(X.tolist(), batch_size=32, show_progress_bar=False)
        return {'w2v': X_w2v, 'glove': np.zeros((len(X), 100)), 'sbert': X_sbert} # Glove dummy
    def __getstate__(self): d = self.__dict__.copy(); d['w2v_model']=None; d['sbert_model']=None; return d
    def __setstate__(self, s): self.__dict__.update(s); self._ensure_nltk_resources(); self._init_nltk_components()

# ==============================================================================
# 3. CREDIT ADVISOR (CORREGIDO)
# ==============================================================================

class CreditAdvisor:
    def __init__(self, api_key: str):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"üöÄ Usando dispositivo: {self.device}")

        # Pipelines
        self.text_pipeline = joblib.load(os.path.join(PATH_PIPELINE, "text_pipeline.joblib"))
        self.data_pipeline = joblib.load(os.path.join(PATH_PIPELINE, "pipeline_bankchurner_preprocessing.joblib"))

        # Modelos de Sentimiento (Ya funcionaban)
        self.model_glove = SentimentClassifier(input_dim=100)
        self.model_sbert = SentimentClassifier(input_dim=384) 
        self.model_w2v = SentimentClassifier(input_dim=100)
        
        # Cargar Sentimiento
        self._load_safe(self.model_glove, os.path.join(PATH_MODELS, "2/GLOVE.pt"))
        self._load_safe(self.model_sbert, os.path.join(PATH_MODELS, "2/SBERT.pt"))
        self._load_safe(self.model_w2v, os.path.join(PATH_MODELS, "2/Word2Vec.pt"))

        # --- MODELO DE RECOMENDACI√ìN (AQU√ç EST√Å LA MAGIA) ---
        # 1. Definimos Input=98 (seg√∫n tu error 1x98)
        # 2. Definimos Hidden=64 (valor inicial)
        self.reco_input_dim = 96
        self.reco_hidden_dim = 8
        self.model_recommend = RecommendationModel(input_dim=self.reco_input_dim, hidden_dim=self.reco_hidden_dim)
        
        # 3. Intentamos cargar. Si falla por tama√±o, leemos el tama√±o real del error y recargamos.
        path_reco = os.path.join(PATH_MODELS, "1/recommend.pth")
        self._load_smart_reco(path_reco)

        self.client = OpenAI(api_key=api_key)

    def _load_safe(self, model, path):
        model.to(self.device)
        if os.path.exists(path):
            try:
                model.load_state_dict(torch.load(path, map_location=self.device))
                model.eval()
                print(f"‚úÖ Modelo cargado: {os.path.basename(path)}")
            except Exception as e:
                print(f"‚ùå Error en {os.path.basename(path)}: {e}")

    def _load_smart_reco(self, path):
        """Intenta cargar y corrige autom√°ticamente la dimensi√≥n oculta si falla"""
        self.model_recommend.to(self.device)
        if not os.path.exists(path):
            print(f"‚ö†Ô∏è No encontrado: {path}")
            return

        try:
            state_dict = torch.load(path, map_location=self.device)
            self.model_recommend.load_state_dict(state_dict)
            self.model_recommend.eval()
            print(f"‚úÖ Recomendador cargado correctamente (Input: {self.reco_input_dim}, Hidden: {self.reco_hidden_dim})")
        except RuntimeError as e:
            msg = str(e)
            # Detectar error de tama√±o en layer1.weight
            if "size mismatch for layer1.weight" in msg:
                # El mensaje suele ser: shape [XXX, 98] vs [64, 98]
                # Buscamos el n√∫mero correcto en el mensaje de error
                import re
                # Busca el patr√≥n "torch.Size([XXX, 98])" donde XXX es el hidden correcto
                match = re.search(r'torch\.Size\(\[(\d+),\s*98\]\)', msg)
                if match:
                    correct_hidden = int(match.group(1))
                    print(f"üîÑ Corrigiendo dimensi√≥n oculta: {self.reco_hidden_dim} -> {correct_hidden}")
                    
                    # Reinicializar modelo con el tama√±o correcto
                    self.model_recommend = RecommendationModel(input_dim=self.reco_input_dim, hidden_dim=correct_hidden)
                    self.model_recommend.to(self.device)
                    self.model_recommend.load_state_dict(state_dict)
                    self.model_recommend.eval()
                    print("‚úÖ Recomendador recargado con dimensi√≥n corregida.")
                else:
                    print(f"‚ùå Error de tama√±o no recuperable autom√°ticamente: {e}")
            else:
                print(f"‚ùå Error cargando recomendador: {e}")

    def analyze_client(self, raw_text: str, client_row: pd.Series):
        # 1. Texto
        text_features = self.text_pipeline.transform(pd.Series([raw_text]))
        t_w2v = torch.tensor(text_features['w2v'], dtype=torch.float32).to(self.device)
        t_glove = torch.tensor(text_features['glove'], dtype=torch.float32).to(self.device)
        t_sbert = torch.tensor(text_features['sbert'], dtype=torch.float32).to(self.device)

        with torch.no_grad():
            pred_glove = F.softmax(self.model_glove(t_glove), dim=1).cpu().numpy()[0]
            pred_sbert = F.softmax(self.model_sbert(t_sbert), dim=1).cpu().numpy()[0]
            pred_w2v = F.softmax(self.model_w2v(t_w2v), dim=1).cpu().numpy()[0]

        sentiment_results = {
            "GLOVE": {"neg": float(pred_glove[0]), "neu": float(pred_glove[1]), "pos": float(pred_glove[2])},
            "SBERT": {"neg": float(pred_sbert[0]), "neu": float(pred_sbert[1]), "pos": float(pred_sbert[2])},
            "Word2Vec": {"neg": float(pred_w2v[0]), "neu": float(pred_w2v[1]), "pos": float(pred_w2v[2])}
        }
        avg_positive = np.mean([pred_glove[2], pred_sbert[2], pred_w2v[2]])

        # 2. Datos Tabulares
        df_input = client_row.to_frame().T
        df_processed = self.data_pipeline.transform(df_input)
        
        # Verificar forma antes de pasar al modelo
        if df_processed.shape[1] != self.reco_input_dim:
            print(f"‚ö†Ô∏è Advertencia: Pipeline gener√≥ {df_processed.shape[1]} features, modelo espera {self.reco_input_dim}")
            # Si faltan/sobran columnas, esto fallar√°. 
            # Asumimos que el error 1x98 era correcto y el pipeline genera 98.

        t_reco_input = torch.tensor(df_processed.values, dtype=torch.float32).to(self.device)

        with torch.no_grad():
            logits = self.model_recommend(t_reco_input)
            probs = torch.sigmoid(logits).cpu().numpy()[0]
        
        credit_score = (probs > 0.5).astype(int).tolist()
        decision = ', '.join([card for card, credit in zip(lista_tarjetas, credit_score) if credit == 1])
        tarjetas_sugeridas = decision if decision else "NINGUNA"

        # 3. GPT
        system_prompt = "Eres un asesor experto en productos financieros."
        user_prompt = f"""
        Datos: {client_row.to_dict()}
        Positividad: {avg_positive:.2f}
        Comentario: "{raw_text}"
        Tarjetas Pre-calificadas: {tarjetas_sugeridas}
        
        Salida:
        - An√°lisis: [Justificaci√≥n]
        - Tarjetas a Entregar: [Lista o "Ninguna"]
        """
        
        try:
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
                temperature=0.2, max_tokens=300
            )
            explanation = response.choices[0].message.content.strip()
        except Exception as e:
            explanation = f"Error GPT: {e}"

        return {
            "sentiment": sentiment_results,
            "decision_model": decision,
            "gpt_explanation": explanation
        }

if __name__ == "__main__":
    try:
        advisor = CreditAdvisor(api_key=API_KEY)
        
        df = pd.read_csv(os.path.join(PATH_DATA, "BankChurners_merged.csv"))
        df.drop(columns=['NPS'], inplace=True, errors='ignore')
        df = df.dropna(subset=['Twitter'])

        idx = 5
        client_row = df.iloc[idx].drop('Twitter')
        text_input = df.iloc[idx]['Twitter']

        result = advisor.analyze_client(text_input, client_row)
        
        print("\nüîπ RESULTADOS üîπ")
        print("Decisi√≥n:", result["decision_model"])
        print("\n--- GPT ---\n", result["gpt_explanation"])
        
    except Exception as e:
        print(f"\n‚ùå Error: {e}")

üöÄ Usando dispositivo: cuda
üîÑ Reinicializando recursos NLTK despu√©s de deserializaci√≥n...
üì• Descargando recurso NLTK: wordnet
üì• Descargando recurso NLTK: omw-1.4
‚úÖ TextPreprocessor deserializado correctamente
‚úÖ Modelo cargado: GLOVE.pt
‚úÖ Modelo cargado: SBERT.pt
‚úÖ Modelo cargado: Word2Vec.pt
üîÑ Corrigiendo dimensi√≥n oculta: 8 -> 8

‚ùå Error: Error(s) in loading state_dict for RecommendationModel:
	size mismatch for layer1.weight: copying a param with shape torch.Size([8, 96]) from checkpoint, the shape in current model is torch.Size([8, 98]).
