# Audio Transcripts Summarizer

In [1]:
from vosk import Model, KaldiRecognizer
from pydub import AudioSegment
import json
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline

In [2]:
FRAME_RATE = 16000
CHANNELS = 1

In [3]:
MODEL_PATH = "vosk-model-en-us-0.22"
model = Model(MODEL_PATH)

In [4]:
# model = Model(model_name="vosk-model-en-us-0.22")
# For a smaller download size, use model = Model(model_name="vosk-model-small-en-us-0.15")
rec = KaldiRecognizer(model, FRAME_RATE)
rec.SetWords(True)

In [5]:
mp3 = AudioSegment.from_mp3("marketplace.mp3")
mp3 = mp3.set_channels(CHANNELS)
mp3 = mp3.set_frame_rate(FRAME_RATE)

In [6]:
rec.AcceptWaveform(mp3.raw_data)
result = rec.Result()

In [7]:
result

'{\n  "result" : [{\n      "conf" : 1.000000,\n      "end" : 0.180000,\n      "start" : 0.090000,\n      "word" : "the"\n    }, {\n      "conf" : 1.000000,\n      "end" : 0.570000,\n      "start" : 0.180000,\n      "word" : "funny"\n    }, {\n      "conf" : 1.000000,\n      "end" : 0.960000,\n      "start" : 0.600000,\n      "word" : "thing"\n    }, {\n      "conf" : 1.000000,\n      "end" : 1.230000,\n      "start" : 0.960000,\n      "word" : "about"\n    }, {\n      "conf" : 1.000000,\n      "end" : 1.320000,\n      "start" : 1.230000,\n      "word" : "the"\n    }, {\n      "conf" : 1.000000,\n      "end" : 1.680000,\n      "start" : 1.320000,\n      "word" : "big"\n    }, {\n      "conf" : 1.000000,\n      "end" : 2.220000,\n      "start" : 1.710000,\n      "word" : "economic"\n    }, {\n      "conf" : 1.000000,\n      "end" : 2.490000,\n      "start" : 2.220000,\n      "word" : "news"\n    }, {\n      "conf" : 1.000000,\n      "end" : 2.580000,\n      "start" : 2.490000,\n      "wo

In [8]:
text = json.loads(result)["text"]

In [9]:
text

"the funny thing about the big economic news of the day the fed raising interest rates half a percentage point was that there was only really one tidbit of actual news in the news and the interest rate increase wasn't it you knew it was coming i knew it was common wall street news come and businesses knew it was common so on this fed day on this program something a little bit different jay powell in his own words five of 'em his most used economic words from today's press conference were number one of course it's the biggie two percent inflation inflation inflation inflation inflation inflation lh dealing with inflation pals big worry the thing keeping him up at night price stability is the fed's whole ballgame right now pau basically said as much today we're"

In [10]:
import torch
import torch.serialization

# Override PyTorch's default behavior
torch.serialization.load = lambda *args, **kwargs: torch.load(*args, weights_only=False, **kwargs)

import os
import subprocess

python_path = "C:/Users/avira/miniconda3/envs/my_env/python.exe"
script_path = r"C:\Users\avira\Downloads\AudioSummarizerV2-main\AudioSummarizerV2-main\vosk-recasepunc-en-0.22\recasepunc.py"
checkpoint_path = r"C:\Users\avira\Downloads\AudioSummarizerV2-main\AudioSummarizerV2-main\vosk-recasepunc-en-0.22\checkpoint"

command = f'"{python_path}" "{script_path}" predict "{checkpoint_path}"'

try:
    cased = subprocess.run(command, shell=True, text=True, input=text, capture_output=True, check=True)
    print("Output:\n", cased.stdout)
except subprocess.CalledProcessError as e:
    print("Error Output:\n", e.stderr)

Output:
 The funny thing about the big economic news of the day, the Fed raising interest rates half a percentage point, was that there was only really one tidbit of actual news in the news. And the interest rate increase, wasn ' t it. You knew it was coming. I knew it was common Wall Street news come. And businesses knew it was common. So on this Fed day, on this program, something a little bit different. Jay Powell, in his own words, five of ' em. His most used economic words from today ' s press conference were number one, Of course, it ' s the biggie Two percent inflation, inflation, inflation, inflation, inflation, inflation. Lh Dealing with inflation pals Big worry. The thing keeping him up at night, price stability is the Fed ' s whole ballgame right now. Pau basically said as much today. We ' re.



In [19]:
def voice_recognition(filename):
    model = Model(model_name="vosk-model-en-us-0.22")
    rec = KaldiRecognizer(model, FRAME_RATE)
    rec.SetWords(True)
    
    mp3 = AudioSegment.from_mp3(filename)
    mp3 = mp3.set_channels(CHANNELS)
    mp3 = mp3.set_frame_rate(FRAME_RATE)
    
    step = 45000
    transcript = ""
    for i in range(0, len(mp3), step):
        print(f"Progress: {i/len(mp3)}")
        segment = mp3[i:i+step]
        rec.AcceptWaveform(segment.raw_data)
        result = rec.Result()
        text = json.loads(result)["text"]
        transcript += text
    
    # Use Recasepunc model to add punctuation
    python_path = "C:/Users/avira/miniconda3/envs/my_env/python.exe"
    script_path = r"C:\Users\avira\Downloads\AudioSummarizerV2-main\AudioSummarizerV2-main\vosk-recasepunc-en-0.22\recasepunc.py"
    checkpoint_path = r"C:\Users\avira\Downloads\AudioSummarizerV2-main\AudioSummarizerV2-main\vosk-recasepunc-en-0.22\checkpoint"

    command = f'"{python_path}" "{script_path}" predict "{checkpoint_path}"'
    
    # Run subprocess and capture output
    cased = subprocess.run(command, shell=True, text=True, input=transcript, capture_output=True, check=True)

    # Extract the output from stdout and use it
    cased_text = cased.stdout.strip()

    # Now split the text into tokens
    split_tokens = cased_text.split(" ")
    
    docs = []
    for i in range(0, len(split_tokens), 850):
        selection = " ".join(split_tokens[i:(i+850)])
        docs.append(selection)
    
    return docs

In [None]:
"""def voice_recognition(filename):
    model = Model(model_name="vosk-model-en-us-0.22")
    rec = KaldiRecognizer(model, FRAME_RATE)
    rec.SetWords(True)
    
    mp3 = AudioSegment.from_mp3(filename)
    mp3 = mp3.set_channels(CHANNELS)
    mp3 = mp3.set_frame_rate(FRAME_RATE)
    
    step = 45000
    transcript = ""
    for i in range(0, len(mp3), step):
        print(f"Progress: {i/len(mp3)}")
        segment = mp3[i:i+step]
        rec.AcceptWaveform(segment.raw_data)
        result = rec.Result()
        text = json.loads(result)["text"]
        transcript += text
    
    return transcript"""

In [20]:
docs = voice_recognition("marketplace_full.mp3")

Progress: 0.0
Progress: 0.02666815218151411
Progress: 0.05333630436302822
Progress: 0.08000445654454233
Progress: 0.10667260872605644
Progress: 0.13334076090757055
Progress: 0.16000891308908466
Progress: 0.18667706527059877
Progress: 0.21334521745211288
Progress: 0.240013369633627
Progress: 0.2666815218151411
Progress: 0.29334967399665524
Progress: 0.3200178261781693
Progress: 0.34668597835968346
Progress: 0.37335413054119754
Progress: 0.4000222827227117
Progress: 0.42669043490422576
Progress: 0.4533585870857399
Progress: 0.480026739267254
Progress: 0.5066948914487681
Progress: 0.5333630436302822
Progress: 0.5600311958117963
Progress: 0.5866993479933105
Progress: 0.6133675001748246
Progress: 0.6400356523563386
Progress: 0.6667038045378528
Progress: 0.6933719567193669
Progress: 0.720040108900881
Progress: 0.7467082610823951
Progress: 0.7733764132639093
Progress: 0.8000445654454234
Progress: 0.8267127176269374
Progress: 0.8533808698084515
Progress: 0.8800490219899657
Progress: 0.90671717

In [21]:
docs

["Turns out fifty four dollars and twenty cents was not a joke from American public media. This is Marketplace, The in Los Angeles, CA, raised on Monday today. I do believe the twenty fifth of April. Good as always to have you along, everybody. Alright ? Just for fun, I am going to see if I can do this in two hundred and eighty characters. Which is of course Twitter ' s limit starting right now. After making a not very veiled marijuana reference in offering fifty four dollarstwenty cents a share to buy Twitter, Elon Musk has sealed the deal. As of today, Lauren Hirsch has been covering the story for the New York Times. Thanks for coming on. Thanks for having me setting aside all marijuana jokes that many people made with this price that Musk offered. And clearly he was serious. And now this has happened in an unbelievably fast timeline, right ? And we will be fast. I tell you, I was at a shower yesterday communicating like us to where it ' s kind of casually tracking and then my source

In [15]:
MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
SAVE_PATH = "./saved_model"

# Download & save
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

model.save_pretrained(SAVE_PATH)
tokenizer.save_pretrained(SAVE_PATH)



('./saved_model\\tokenizer_config.json',
 './saved_model\\special_tokens_map.json',
 './saved_model\\vocab.json',
 './saved_model\\merges.txt',
 './saved_model\\added_tokens.json',
 './saved_model\\tokenizer.json')

In [16]:
summarizer2 = pipeline("summarization", model="./saved_model", tokenizer="./saved_model")

Device set to use cpu


In [22]:
summaries = summarizer2(docs)
summary = "\n\n".join([d["summary_text"] for d in summaries])
print(summary)

 This is Marketplace, The in Los Angeles, CA, raised on Monday today . After making a not very veiled marijuana reference in offering fifty four dollarstwenty cents a share to buy Twitter, Elon Musk has sealed the deal . Marketplace's Lauren Hirsch has been covering the story for the New York Times .

 Twitter shares on this Monday up almost six percent still, though a couple of bucks shy of must offer of one hundred and fifty four dollars, twenty cents a piece elsewhere, the major indices were up . Jacob Orchard, a phd candidate at U.C. San Diego, tells how the price index forlow income and high income households can systematically differ over the course of the business cycle .

 The National Association for Business Economics is out with a new survey of economists to work at big companies and those economists . A record number of economists say their businesses have been book and pay increases of late . But for the most part, employers aren't boosting wages enough to keep up with soa