<a href="https://colab.research.google.com/github/gitmystuff/INFO5737/blob/main/Server_I.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary libraries
!pip install transformers fastapi uvicorn python-multipart pyngrok slowapi -q

In [None]:
import logging
from fastapi import FastAPI, Request, HTTPException
import uvicorn
import torch
import nest_asyncio
from google.colab import userdata
from pyngrok import ngrok
import requests
import json

logging.basicConfig(level=logging.DEBUG)

# Hugging Face Inference API details
HF_TOKEN = userdata.get("HF_TOKEN")  # Store your Hugging Face token in Colab secrets
# API_URL = "https://api-inference.huggingface.co/models/gpt2"
# API_URL = "https://api-inference.huggingface.co/models/distilgpt2"
# API_URL = "https://api-inference.huggingface.co/models/codellama/CodeLlama-7b-hf"
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
# API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}

# Create the FastAPI App
app = FastAPI()

# Define an Endpoint for Chatbot Interaction
@app.post("/chat/")
async def chat(request: Request):
    try:
        data = await request.json()
        user_input = data["message"]
        history = data.get("history", [])

        # Prompt engineering (basic)
        prompt = f"You are a concise chatbot. User: {user_input} Chatbot Response: "

        # Simplified payload (send the engineered prompt)
        payload = {"inputs": prompt}

        # Call the Hugging Face Inference API
        response = requests.post(API_URL, headers=headers, json=payload)
        response.raise_for_status()  # Raise HTTPError for API errors
        response_data = response.json()
        bot_response = response_data[0]["generated_text"]

        # Truncate response (to control length)
        bot_response = bot_response[:500]  # Limit to 100 characters

        # Update history (simplified)
        new_history = history + [{"user": user_input, "bot": bot_response}]

        logging.debug(f"User Input: {user_input}")
        logging.debug(f"Bot Response: {bot_response}")
        logging.debug(f"History: {new_history}")

        return {"response": bot_response, "history": new_history}

    except requests.exceptions.HTTPError as api_error:
        logging.error(f"Hugging Face API Error: {api_error}")
        raise HTTPException(status_code=500, detail=f"Hugging Face API Error: {api_error}")
    except Exception as e:
        logging.exception("Exception in /chat/ endpoint:")
        raise HTTPException(status_code=500, detail="Internal server error")

# Run the FastAPI App and Expose it with Ngrok
if __name__ == "__main__":
    nest_asyncio.apply()

    authtoken = userdata.get("NGROK_KEY")

    if authtoken:
        ngrok.set_auth_token(authtoken)

        http_tunnel = ngrok.connect(8000)
        print("Public URL:", http_tunnel.public_url)

        uvicorn.run(app, host="0.0.0.0", port=8000)
    else:
        print("Error: ngrok authtoken not found. Please add it to Colab user secrets as 'NGROK_KEY'.")