In [2]:
import whisper_timestamped as whisper

audio = whisper.load_audio("../data/Stabilizing Large Sparse Mixture-of-Experts Models.wav")

model = whisper.load_model("NbAiLab/whisper-large-v2-nob", device="cuda")

result = whisper.transcribe(model, audio, language="en")

import json
print(json.dumps(result, indent = 2, ensure_ascii = False))


  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 94676/94676 [04:21<00:00, 362.62frames/s]


{
  "text": " All right, so today we're going to be looking at AI and specifically how to make it a whole lot smarter, but without needing, you know, like a giant supercomputer. You're interested in these sparse expert models, right? And specifically this paper about STMOe, stable and transferable mixture of experts. It sounds kind of intimidating. I think the idea is actually really elegant. It is. Think about it this way. Instead of one massive AI brain, you know, trying to process everything. What if you had a team of specialized experts? What if you had a team of people who were able to make a smart AI? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a Each one incredibly good at their own thing. That's the core concept behind these sparse expert models. OK, so it's less like one giant dictionary. 

In [3]:
import io
with io.open('data.json', 'w', encoding='utf-8') as f:
  f.write(json.dumps(result, ensure_ascii=False))

In [4]:
import json

# Load JSON file
with open("data.json", "r", encoding="utf-8") as file:
    data = json.load(file)

# Ensure the JSON is a list of dictionaries (records)
if isinstance(data, dict):
    data = [data]  # Convert to list format if it's a single object

print("Sample Record:", data[0])  

Sample Record: {'text': " All right, so today we're going to be looking at AI and specifically how to make it a whole lot smarter, but without needing, you know, like a giant supercomputer. You're interested in these sparse expert models, right? And specifically this paper about STMOe, stable and transferable mixture of experts. It sounds kind of intimidating. I think the idea is actually really elegant. It is. Think about it this way. Instead of one massive AI brain, you know, trying to process everything. What if you had a team of specialized experts? What if you had a team of people who were able to make a smart AI? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a Each one incredibly good at their own thing. That's the core concept behind these sparse expert models. OK, so it's less like one giant 

In [5]:
import sqlite3

DB_NAME = "transcriptions.db"

def create_tables():
    """Creates SQLite tables for storing transcription data and segments separately."""
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    
    # Table for transcription metadata
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS transcriptions (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        text TEXT,
        language TEXT
    );
    """)

    # Table for individual segments linked to transcriptions
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS segments (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        transcription_id INTEGER,
        start REAL,
        end REAL,
        text TEXT,
        tokens TEXT,
        temperature REAL,
        avg_logprob REAL,
        compression_ratio REAL,
        no_speech_prob REAL,
        confidence REAL,
        words TEXT,
        FOREIGN KEY (transcription_id) REFERENCES transcriptions(id) ON DELETE CASCADE
    );
    """)

    conn.commit()
    conn.close()
    print("Tables created successfully.")

# Run this first to create the tables
create_tables()


Tables created successfully.


In [10]:
import json

def insert_transcription(data):
    """Inserts transcription metadata and segments into separate tables."""
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    
    print("Text:", data["text"])

    # Insert into transcriptions table
    cursor.execute("""
    INSERT INTO transcriptions (text, language) 
    VALUES (?, ?)""",
    (data.get("text", ""), data.get("language", ""))
    )

    # Get the last inserted transcription ID
    transcription_id = cursor.lastrowid

    # Insert segments
    for segment in data.get("segments", []):
        cursor.execute("""
        INSERT INTO segments (
            transcription_id, start, end, text, tokens, temperature, 
            avg_logprob, compression_ratio, no_speech_prob, confidence, words
        ) 
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            transcription_id,
            segment.get("start", 0),
            segment.get("end", 0),
            segment.get("text", ""),
            json.dumps(segment.get("tokens", [])),  # Store as JSON string
            segment.get("temperature", 0),
            segment.get("avg_logprob", 0),
            segment.get("compression_ratio", 0),
            segment.get("no_speech_prob", 0),
            segment.get("confidence", 0),
            json.dumps(segment.get("words", []))  # Store words as JSON string
        ))

    conn.commit()
    conn.close()
    print(f"Transcription and {len(data.get('segments', []))} segments inserted successfully.")


# Insert the sample data
insert_transcription(data[0])


Text:  All right, so today we're going to be looking at AI and specifically how to make it a whole lot smarter, but without needing, you know, like a giant supercomputer. You're interested in these sparse expert models, right? And specifically this paper about STMOe, stable and transferable mixture of experts. It sounds kind of intimidating. I think the idea is actually really elegant. It is. Think about it this way. Instead of one massive AI brain, you know, trying to process everything. What if you had a team of specialized experts? What if you had a team of people who were able to make a smart AI? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a team of people who were able to make a smart computer? What if you had a Each one incredibly good at their own thing. That's the core concept behind these sparse expert models. OK, so it's less like one giant dictionary. More li

In [19]:
import pandas as pd
# first few segments
conn = sqlite3.connect(DB_NAME)
query = "SELECT transcription_id , start, end, substr(text, 0, 30) as text FROM segments LIMIT 5"
df = pd.read_sql(query, conn)
print(df.head(5))



   transcription_id  start    end                           text
0                 1   0.00  31.36   All right, so today we're go
1                 1  31.36  36.08   Each one incredibly good at 
2                 1  36.16  47.08   OK, so it's less like one gi
3                 1  47.08  53.11   Earlier attempts at this kin
4                 1  53.11  57.81   reliable and also adaptable,
