In [None]:
# ==========================================
# 🧠 SMART FINANCIAL ADVISOR (FLAN-T5 + LoRA)
# ==========================================

# 1️⃣ Install dependencies
!pip install -q transformers datasets peft accelerate bitsandbytes flask pyngrok torch sentencepiece

# 2️⃣ Imports & GPU check
import torch, os
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from datasets import Dataset
from peft import LoraConfig, get_peft_model, TaskType
from flask import Flask, request, jsonify
from pyngrok import ngrok

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

# 3️⃣ Load tokenizer & base model
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")

# 4️⃣ Create small financial Q&A + summarization dataset
data = {
    "instruction": [
        "Explain SIP in simple terms",
        "What is credit score?",
        "How can someone improve financial health?",
        "Summarize this: Systematic Investment Plan allows regular investment in mutual funds over time."
    ],
    "response": [
        "SIP means investing a fixed amount regularly in mutual funds to build wealth gradually.",
        "A credit score is a number that shows how likely a person is to repay loans on time.",
        "To improve financial health, track expenses, save regularly, invest wisely, and avoid high debt.",
        "SIP helps people invest small amounts regularly in mutual funds instead of one-time large sums."
    ]
}
dataset = Dataset.from_dict(data)

def preprocess(example):
    inputs = [f"Question: {q}" for q in example["instruction"]]
    model_inputs = tokenizer(inputs, max_length=256, truncation=True)
    labels = tokenizer(example["response"], max_length=128, truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized = dataset.map(preprocess, batched=True)

# 5️⃣ Apply LoRA adapters
lora_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none"
)
model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()

# 6️⃣ Training setup
args = TrainingArguments(
    output_dir="./finetuned_flant5_lora",
    per_device_train_batch_size=2,
    num_train_epochs=2,
    learning_rate=2e-4,
    logging_steps=10,
    fp16=True,
    save_strategy="no"
)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized,
    data_collator=data_collator
)
trainer.train()

# 7️⃣ Test the model
def generate_text(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_new_tokens=128)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

print("🧩 Sample Tests:")
print("Q:", data["instruction"][0])
print("A:", generate_text("Question: " + data["instruction"][0]))
print("Summary:", generate_text("Summarize: Investing regularly in mutual funds helps build wealth over time."))

# 8️⃣ Save LoRA model
model.save_pretrained("smart_financial_advisor_lora")
tokenizer.save_pretrained("smart_financial_advisor_lora")

# 9️⃣ Flask app
app = Flask(__name__)

@app.route("/ask", methods=["POST"])
def ask():
    q = request.json.get("question", "")
    ans = generate_text(f"Question: {q}")
    return jsonify({"answer": ans})

@app.route("/summarize", methods=["POST"])
def summarize():
    text = request.json.get("text", "")
    summ = generate_text(f"Summarize: {text}")
    return jsonify({"summary": summ})

# 10️⃣ Run with ngrok
public_url = ngrok.connect(5000)
print("🌐 Public URL:", public_url)
app.run(port=5000)