In [None]:
# Install required libraries
!pip install -q transformers accelerate einops langchain bitsandbytes

!pip -q install fastapi
!pip -q install pyngrok
!pip -q install uvicorn
!pip -q install nest_asyncio

In [None]:
# Load model
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline

import torch

model_name = "tiiuae/falcon-7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)

pipeline = pipeline(
    'text-generation',
    model=model_name,
    tokenizer=tokenizer,
    trust_remote_code=True,
    device_map="auto",
    torch_dtype=torch.float16,
    max_length=512,
    do_sample=False,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)

In [None]:
# Craft prompt template and wrap model around langchain llm chain
from langchain import PromptTemplate, LLMChain

falcon = HuggingFacePipeline(pipeline=pipeline, model_kwargs={"temperature": 0.0})

def answer_with_falcon(riddle):
  template = """
      You are a science prodigy currently competing in a National Science competition. You are now in the fifth round, where you must provide a one-word answer to a riddle. Remember, your answer should consist of just the term the riddle is pointing to, and nothing else. Adding additional text will result in point deductions.
      Here's an example to guide you:
      Riddle: you might think i am a rather unstable character because i never stay at one place, however my motion obeys strict rules and i always return to where i started and even if i have to leave that spot again i do it in strict accordance to time, i can be named in electrical and mechanical contexts in all cases i obey the same mathematical rules, in order to fully analyse me you would think about a stiffness or force constant restoring force and angular frequency,
      Answer: oscillator

      Read the riddle below and provide the correct answer.
      Riddle: {riddle}

      Answer:"""

  prompt = PromptTemplate(template=template, input_variables=["riddle"])

  falcon_chain = LLMChain(prompt=prompt, llm=falcon)

  return falcon_chain.run(riddle)

In [None]:
# Import modules to set up API
import uvicorn
import fastapi
from pyngrok import ngrok
from pydantic import BaseModel
import nest_asyncio

nest_asyncio.apply()

In [None]:
class InputText(BaseModel):
  text: str

class OutputText(BaseModel):
  answer: str

In [None]:
# Replace comment section of code with your ngrok auth token
!ngrok config add-authtoken # place_your_ngrok_auth_token_here

In [None]:
app = fastapi.FastAPI()

@app.get("/answer", response_model=OutputText)
def answer(input_text: InputText):
  gen_output = answer_with_falcon(input_text.text)
  return {"answer": gen_output}

@app.get("/qa-test", response_model=OutputText)
async def qa_test():
    return {"answer": "Hello from QA"}

In [None]:
ngrok_tunnel = ngrok.connect(8000)
print("Public URL:", ngrok_tunnel.public_url)
uvicorn.run(app, port=8000)