In [1]:
# app.py
import os
import re
import time
import torch
from fastapi import FastAPI, HTTPException, Depends, Header
from pydantic import BaseModel
from typing import Optional, List, Dict
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from gradio_client import Client
import pinecone

In [3]:
from huggingface_hub import login
login(token=os.environ["HUGGINGFACE_TOKEN"])


In [4]:
import os
# Change BASE_MODEL to a smaller one if GPU/RAM is insufficient
os.environ["BASE_MODEL"] = "meta-llama/Llama-3.1-8B-Instruct"   # or "gpt2" for fast local testing
os.environ["ADAPTER_PATH"] = "lora_adapter"
os.environ["HF_SPACE"] = "asr3232/Youtube_summarizer_model"
os.environ["API_KEY"] = "testkey123"
os.environ["DEV_NO_MODEL"] = "0"   # set to "1" to disable model loading (fast dev)
print("BASE_MODEL:", os.environ["BASE_MODEL"])
print("ADAPTER_PATH:", os.environ["ADAPTER_PATH"])
os.environ["PINECONE_INDEX"] = "journal-memory"
os.environ["PINECONE_API_KEY"] = "pcsk_2evx4E_LUgVyXmz9AiTTsPaysofuE3Mxu4KGJZiDNdEeoET3HiweH1RN8mETkMAP6zXXAq"
os.environ["EMBED_DIM"] = "384"
   # set your key here locally

BASE_MODEL: meta-llama/Llama-3.1-8B-Instruct
ADAPTER_PATH: lora_adapter


In [4]:
# find and kill uvicorn
import subprocess, signal, os
# naive kill: find PIDs of uvicorn (may require adjustment)
!pkill -f "uvicorn"


In [5]:
!ps aux | grep ngrok


pgarg5b+    4683 95.2  0.0   8464  3712 pts/3    Ss+  11:24   0:00 /usr/bin/zsh -c ps aux | grep ngrok
pgarg5b+    4688  0.0  0.0   7084  2176 pts/3    S+   11:24   0:00 grep ngrok


In [6]:
# Cell 6: start ngrok tunnel and keep public_url in a variable
import nest_asyncio
nest_asyncio.apply()
from pyngrok import ngrok

# Optional: set your ngrok auth token if you have one
ngrok.set_auth_token("35VYwe4UExk2G1HiioUqw4Yb2hY_5EuBCdiN42pfAPxiw6W1e")

tunnel = ngrok.connect(8000)
public_url = tunnel.public_url
print("PUBLIC URL:", public_url)


PUBLIC URL: https://humorous-judah-chemotactic.ngrok-free.dev


In [7]:
# Cell 7: start the server in the background (so we can run test requests)
# This will create uvicorn.log that you can inspect.
import subprocess, os, time
cmd = f'{os.sys.executable} -m uvicorn app:app --host 0.0.0.0 --port 8000'
print("Starting uvicorn with cmd:", cmd)
# Start in background and redirect output
p = subprocess.Popen(cmd.split(), stdout=open("uvicorn.log","w"), stderr=open("uvicorn.log","w"))
time.sleep(2)
print("uvicorn started (PID {}). Check uvicorn.log for output.".format(p.pid))


Starting uvicorn with cmd: /home/zeus/miniconda3/envs/cloudspace/bin/python -m uvicorn app:app --host 0.0.0.0 --port 8000


uvicorn started (PID 5423). Check uvicorn.log for output.


In [13]:
# Cell 8: show last 200 lines of uvicorn.log to monitor model loading / errors
import time, os
logfile = "uvicorn.log"
time.sleep(1)
if os.path.exists(logfile):
    print("--- tail uvicorn.log ---")
    with open(logfile, "r") as f:
        lines = f.read().splitlines()[-200:]
    print("\n".join(lines))
else:
    print("uvicorn.log not found yet.")


--- tail uvicorn.log ---
INFO:     Started server process [5423]
INFO:     Waiting for application startup.

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]
Loading checkpoint shards:  25%|██▌  Loaded PEFT adapter from lora_adapter
Model loaded.
nt shards:  50%|█████     | 2/4 [00:48<00:40, 20.48s/it]
Loading checkpoint shards:  75%|███████▌  | 3/4 [00:51<00:12, 12.84s/it]
Loading checkpoint shards: 100%|██████████| 4/4 [00:52<00:00,  7.86s/it]
Loading checkpoint shards: 100%|██████████| 4/4 [00:52<00:00, 13.02s/it]
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


In [65]:
# Cell 9: test the endpoints (replace NGROK if needed)
import os, requests, json, time

# If public_url wasn't set yet, ask for it
try:
    NGROK = public_url
except NameError:
    NGROK = input("Paste NGROK URL (e.g. https://xxxx.ngrok-free.app): ").strip()

print("Using NGROK:", NGROK)

# API key you are using in app.py
headers = {"x-api-key": os.environ.get("API_KEY", "testkey123")}

# --- Test /health ---
print("\n--- Checking /health ---")
try:
    health = requests.get(f"{NGROK}/health", headers=headers, timeout=30)
    print("health status:", health.status_code, health.text)
except Exception as e:
    print("Health request failed:", e)


# --- Test /generate (normal reflection) ---
print("\n--- Calling /generate ---")
payload = {
    "user_id": 1,   # IMPORTANT! Memory works only if user_id is passed
    "journal_text": "aaj mein bohot khush hoon",
    "max_new_tokens": 250,
    "temperature": 0.8,
    "mode": "normal"
}

try:
    r = requests.post(f"{NGROK}/generate", json=payload, headers=headers, timeout=180)
    print("status:", r.status_code)
    print(json.dumps(r.json(), indent=2))
except Exception as e:
    print("Generate request failed:", e)


# --- Test /weekly_summary ---
print("\n--- Calling /weekly_summary ---")
summary_payload = {
    "user_id": 1,
    "days": 7
}

try:
    s = requests.post(f"{NGROK}/weekly_summary", json=summary_payload, headers=headers, timeout=180)
    print("status:", s.status_code)
    print(json.dumps(s.json(), indent=2))
except Exception as e:
    print("Weekly summary request failed:", e)


Using NGROK: https://humorous-judah-chemotactic.ngrok-free.dev

--- Checking /health ---
health status: 200 {"status":"ok","device":"cuda"}

--- Calling /generate ---


status: 200
{
  "text": "Wah, bahut achhi baat hai! Aapne jo feelings share kiya hua hai, wo bilkul sahi hai. Khushi aur positive vibes aapki personality ka ek bada hissa hain, aur isse aapko zyada hi strong feel hoga.\n\nMotivation: Aapko apni khushi ko is tarah hi celebrate karna chaahiye, kyunki yeh life ki asli value hai!\n\nImprovement Tip: Akelein mat so, thodi duniya ke saath connect karna bhi zaroori hai, jaise dosti ya kisi activity join karna.\n\nGuided Resource: Aise moments ko enjoy karna seekhein aur khud ko samjhana seekhe, yahi aapke growth ka raasta hai.\n\nClosing Note: Tumhari positivity ka ye time hona ek blessing hai, is par khushi aur pride feel karo!"
}

--- Calling /weekly_summary ---
status: 200
{
  "text": "**Emotion Pattern:** Aap akeelapan mehsonosein mehnsa hai aur apni feelings share kar rahe hai, jo zaroor ki hai.\n\n**Key Win:** Aavashyak se aap ne khud ki strengths par focus kiya aur achievements celebrate kiye. Yahi aapas mein support aur positivity laa