<a href="https://colab.research.google.com/github/diyanigam/CookBook/blob/main/CB_API_hosting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers peft torch langchain langchain-community uvicorn fastapi ngrok pyngrok

In [None]:
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import HuggingFacePipeline
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Dict, Any
from fastapi.middleware.cors import CORSMiddleware

In [None]:
access_token = "**********"

base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=access_token)
model = PeftModel.from_pretrained(base_model, "diyanigam/CookBook")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=access_token)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()
print(f"Model loaded successfully on {device}.")

In [None]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

hf_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    temperature=0.8,
    do_sample=True,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id,
    device=0 if device == "cuda" else -1
)

from langchain_community.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=hf_pipeline)
print(f"Model loaded successfully on {device} and wrapped for LangChain.")

In [None]:
app = FastAPI()
conversation_memories: Dict[str, ConversationBufferMemory] = {}
app.add_middleware(
    CORSMiddleware,
    allow_origins=["https://cook-book-phi.vercel.app", "http://localhost:5174/"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class MessageRequest(BaseModel):
    user_id: str
    message: str

@app.post("/chat/")
async def chat_with_model(request: MessageRequest):
    user_id = request.user_id
    user_message = request.message

    # Get or create memory for the user
    if user_id not in conversation_memories:
        conversation_memories[user_id] = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
        print(f"Created new memory for user: {user_id}")

    current_memory = conversation_memories[user_id]

    template = """You are a helpful and detailed recipe generator. You can remember past conversations.
    Generate recipes and modify them based on user requests, making sure to include or exclude specific ingredients.

    **Do NOT generate the next human turn in the conversation. Only provide your AI response.**

    **Do NOT repeat the instructions or the previous conversation. Only provide the recipe.**


    Current conversation:
    {chat_history}
    Human: {input}
    AI:"""

    prompt_template = PromptTemplate(input_variables=["chat_history", "input"], template=template)
    conversation_chain = ConversationChain(
        llm=llm,
        memory=current_memory,
        prompt=prompt_template,
        verbose=False
    )

    try:
        response = conversation_chain.predict(input=user_message)
        return {"user_id": user_id, "response": response}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


In [None]:
!killall ngrok

In [None]:
import uvicorn
from pyngrok import ngrok
import nest_asyncio
!ngrok config add-authtoken **************

nest_asyncio.apply()

PORT = 8000

try:
    public_url = ngrok.connect(PORT).public_url
    print(f"ngrok Public URL: {public_url}")
except Exception as e:
    print(f"Error starting ngrok: {e}")
    print("Please ensure ngrok is installed and you have a valid auth token if needed.")

uvicorn.run(app, host="0.0.0.0", port=PORT)