In [2]:
!pip install flask flask-ngrok flask-cors openai-whisper transformers accelerate bitsandbytes sentencepiece dotenv pyngrok
!apt-get install ffmpeg

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Collecting flask-cors
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Collecting pyngrok
  Downloading pyngrok-7.2.4-py3-none-any.whl.metadata (8.7 kB)
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting p

In [1]:
from utils import *
from flask import Flask, request, jsonify
from flask_cors import CORS
from flask_ngrok import run_with_ngrok
import whisper
import re, json, os
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from dotenv import load_dotenv
from huggingface_hub.hf_api import HfFolder
from pyngrok import ngrok
from collections import defaultdict

In [2]:
load_dotenv()
hf_token = os.getenv("HF_TOKEN")
ng_token = os.getenv("NG_TOKEN")
HfFolder.save_token(hf_token)
ngrok.set_auth_token(ng_token)

In [3]:
public_url = ngrok.connect(5000)
print("🔥 Public URL:", public_url)

🔥 Public URL: NgrokTunnel: "https://7580-34-125-16-213.ngrok-free.app" -> "http://localhost:5000"


In [4]:
config = get_config()
app = Flask(__name__)
CORS(app)
# run_with_ngrok(app)

<flask_cors.extension.CORS at 0x7cd30b0ad890>

In [5]:
audio_model = whisper.load_model(config["audio"]["model_name"])
llm_model_name = config["llm"]["model_name"]

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype="float16"
)

tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
llm_model = AutoModelForCausalLM.from_pretrained(
    llm_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

llm_pipeline = pipeline("text-generation", model=llm_model, tokenizer=tokenizer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


In [7]:
@app.route("/events", methods=["GET"])
def get_calendar_events():
    calendar_path = "calendar.json"
    if not os.path.exists(calendar_path):
        return jsonify({})

    with open(calendar_path, "r") as f:
        calendar_data = json.load(f)

    print(f"Calendar data: {calendar_data}")

    return jsonify(calendar_data)

In [8]:
@app.route("/transcribe", methods=["POST"])
def transcribe_audio_route():
    file = request.files.get("file")
    if not file:
        return jsonify({"error": "No audio file provided"}), 400

    filepath = "temp_audio.webm"
    file.save(filepath)

    wav_path = "converted_audio.wav"
    os.system(f"ffmpeg -y -i {filepath} -ar 16000 -ac 1 {wav_path}")

    transcribed_text = transcribe_audio(audio_model, wav_path)

    response = llm_pipeline(
        build_prompt(transcribed_text),
        max_new_tokens=config["llm"]["max_new_tokens"],
        do_sample=False
    )[0]["generated_text"]

    json_block = re.search(r'\{.*?\}', response, re.DOTALL)
    json_output = json.loads(json_block.group(0)) if json_block else {}

    # updated_output = update_date_from_message(json_output, transcribed_text)
    updated_output = json_output
    updated_output["transcript"] = transcribed_text

    calendar_path = "calendar.json"
    date_key = updated_output["date"]

    if os.path.exists(calendar_path):
        with open(calendar_path, "r") as f:
            calendar_data = json.load(f)
    else:
        calendar_data = {}

    if date_key not in calendar_data:
        calendar_data[date_key] = []

    calendar_data[date_key].append({
        "person": updated_output["person"],
        "time": updated_output["time"],
        "purpose": updated_output["purpose"],
        "transcript": updated_output["transcript"]
    })

    with open(calendar_path, "w") as f:
        json.dump(calendar_data, f, indent=2)

    return jsonify(updated_output)

In [None]:
app.run(host="0.0.0.0", port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
