# **Install Whisper Timestamped**

#**Setup API**

In [1]:
!pip install fastapi uvicorn requests moviepy

Collecting fastapi
  Downloading fastapi-0.110.0-py3-none-any.whl (92 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.1/92.1 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uvicorn
  Downloading uvicorn-0.29.0-py3-none-any.whl (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.8/60.8 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting starlette<0.37.0,>=0.36.3 (from fastapi)
  Downloading starlette-0.36.3-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.5/71.5 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting h11>=0.8 (from uvicorn)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, uvicorn, starlette, fastapi
Successfully installed fastapi-0.110.0 h11-0.14.0 starlette-0.36.3 uvicorn-0.29.0


In [2]:
!pip install streamlit
!pip install chromadb
!pip install tiktoken
!pip install PyPDF2
!pip install langchain


Collecting streamlit
  Downloading streamlit-1.32.2-py2.py3-none-any.whl (8.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.1/8.1 MB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
Collecting packaging<24,>=16.8 (from streamlit)
  Downloading packaging-23.2-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.42-py3-none-any.whl (195 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m195.4/195.4 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.8.1b0-py2.py3-none-any.whl (4.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m65.9 MB/s[0m eta [36m0:00:00[0m
Collecting watchdog>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.0-py3-none-manylinux2014_x86_64.whl (8

In [3]:
!echo "-----------Installing PyNgrok-----------"
!pip install pyngrok
!echo "-----------Installing Ollama------------"
!curl https://ollama.ai/install.sh | sh

-----------Installing PyNgrok-----------
Collecting pyngrok
  Downloading pyngrok-7.1.5-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.1.5
-----------Installing Ollama------------
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 10044    0 10044    0     0  35048      0 --:--:-- --:--:-- --:--:-- 34996>>> Downloading ollama...
100 10044    0 10044    0     0  30256      0 --:--:-- --:--:-- --:--:-- 30253
############################################################################################# 100.0%
>>> Installing ollama to /usr/local/bin...
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [4]:
import subprocess
import threading
import time
import logging.handlers
import sys
import os

def create_logger(name, filename, level, formatter):
    logger = logging.getLogger(name)
    handler = logging.handlers.RotatingFileHandler(filename, maxBytes=5*1024*1024, backupCount=5)
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(level)
    return logger

status_formatter = logging.Formatter('[%(asctime)s] [%(levelname)s] [%(name)s] - %(message)s')
error_formatter = logging.Formatter('[%(asctime)s] [%(levelname)s] [%(name)s] - %(message)s')

loggers = {
    "Status": create_logger("Status", "status.log", logging.INFO, status_formatter),
    "NatsrvStatus": create_logger("NatsrvStatus", "natsrv.log", logging.INFO, status_formatter),
    "OllamaStatus": create_logger("OllamaStatus", "ollama.log", logging.INFO, status_formatter),
    "Error": create_logger("Error", "error.log", logging.ERROR, error_formatter),
    "OllamaError": create_logger("OllamaError", "ollama_error.log", logging.ERROR, error_formatter)
}

class ProcessMonitor:
    def __init__(self):
        self.processes = {}
        self.is_monitoring = True

    def handle_output(self, process_name):
        process = self.processes[process_name]
        logger_status = loggers[f"{process_name.capitalize()}Status"]
        for line in iter(process.stdout.readline, b''):
            logger_status.info(line.decode().strip())


    def run_ollama(self):
        cmd = "ollama serve"
        # Redirect subprocess output to /dev/null
        with open(os.devnull, 'wb') as devnull:
            self.processes['ollama'] = subprocess.Popen(cmd, shell=True, stdout=devnull, stderr=devnull)
        loggers["OllamaStatus"].info(f"Started ollama with command: {cmd}")


    def monitor_process(self, process_name):
        while self.is_monitoring:
            if self.processes[process_name].poll() is not None:
                loggers["Status"].warning(f"{process_name} process has stopped. Restarting...")
                if process_name == 'ollama':
                    self.run_ollama()
            time.sleep(5)

    def start(self):
        self.run_ollama()
        threading.Thread(target=self.monitor_process, args=('ollama',)).start()

    def stop(self):
        self.is_monitoring = False
        for p in self.processes.values():
            p.terminate()

if __name__ == '__main__':
    monitor = ProcessMonitor()
    monitor.start()

INFO:OllamaStatus:Started ollama with command: ollama serve


In [5]:
!ollama pull mistral
!echo "-----------Model Deployed-----------"

[?25lpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest 
pulling e8a35b5937a5...   0% ▕▏    0 B/4.1 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling e8a35b5937a5...   0% ▕▏    0 B/4.1 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling e8a35b5937a5...   0% ▕▏    0 B/4.1 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling e8a35b5937a5...   0% ▕▏    0 B/4.1 GB          

In [6]:
!ollama pull nomic-embed-text
!echo "-----------Embeds Pulled-----------"

[?25lpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest 
pulling 970aa74c0a90...   0% ▕▏    0 B/274 MB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 970aa74c0a90...   0% ▕▏    0 B/274 MB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 970aa74c0a90...   0% ▕▏    0 B/274 MB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 970aa74c0a90...   0% ▕▏    0 B/274 MB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 970aa74c0a90...   0% ▕▏    0 B/274 MB                  [?25h[?25l[2K[1G[A[2K[1Gpulli

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
from fastapi import FastAPI, UploadFile
from pydantic import BaseModel
# import whisper_timestamped as whisper
from tempfile import NamedTemporaryFile
import moviepy.editor
import requests
import uvicorn
import asyncio
from fastapi.middleware.cors import CORSMiddleware

# from fastapi.middleware.cors import CORSMiddleware
# class Input(BaseModel):
#   string : str

# idhar toda sa hamara imports
import os
import PyPDF2
from langchain_community.vectorstores import Chroma
from langchain_community import embeddings
from langchain_community.llms import Ollama
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import CharacterTextSplitter
# idhar katam

app = FastAPI()

app.add_middleware(
     CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=['*'],
    allow_headers=['*'],
)

def extract_text_from_pdf(pdf_file_path):
  with open(pdf_file_path, 'rb') as pdf:
    reader = PyPDF2.PdfReader(pdf, strict=False)
    pdf_text = []
    for page in reader.pages:
        content = page.extract_text()
        pdf_text.append(content)
  return pdf_text

def process_input(text, question):
    model_local = Ollama(model="mistral")

    # split the text into chunks
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=7500, chunk_overlap=100)
    doc_split = text_splitter.split_text(str(text))
    # print(doc_splits)
    doc_splits = text_splitter.create_documents(doc_split)
    # convert text chunks into embeddings and store in vector database

    vectorstore = Chroma.from_documents(
        documents=doc_splits,
        collection_name="rag-chroma",
        embedding=embeddings.OllamaEmbeddings(model='nomic-embed-text'),
    )
    retriever = vectorstore.as_retriever()

    after_rag_template = """Answer the question based only on the pdfs which are available in "data" folder in my drive else reply with "i do not know the answer":
    {context}
    Question: {question}
    """
    after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
    after_rag_chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | after_rag_prompt
        | model_local
        | StrOutputParser()
    )
    return after_rag_chain.invoke(question)

@app.get("/split/")
async def split(str):
    question = str
    pdf_file_path = "/content/drive/MyDrive/data/The_Alchemist.pdf"
    text = extract_text_from_pdf(pdf_file_path)
    answer = process_input(text, question)
    return {'string': answer}

config = uvicorn.Config(app)
server = uvicorn.Server(config)
loop = asyncio.get_running_loop()
loop.create_task(server.serve())

<Task pending name='Task-1' coro=<Server.serve() running at /usr/local/lib/python3.10/dist-packages/uvicorn/server.py:67>>

INFO:     Started server process [192]
INFO:     Waiting for application startup.


# **Setup Ngrok**

In [9]:
!echo "-----------Installing PyNgrok-----------"
!pip install pyngrok

!ngrok config add-authtoken 2YVyMkNsFFVNXIhAY8Kx0lKXsEw_2DXNH81vafPVB8VkFBkB2

-----------Installing PyNgrok-----------
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [10]:
from pyngrok import ngrok

public_url = ngrok.connect(8000)
ssh_url = ngrok.connect(22, "tcp")

print("\n\n\n")
print(f"Connect to your API at: {public_url}")





Connect to your API at: NgrokTunnel: "https://1654-34-31-227-28.ngrok-free.app" -> "http://localhost:8000"
