In [4]:
from google.colab import drive
from google.colab import files # Import the 'files' object from google.colab

uploaded = files.upload()

Saving webinar3.mp3 to webinar3.mp3


In [8]:
import whisper
from pydub import AudioSegment
from transformers import pipeline
import spacy
from keybert import KeyBERT

def convert_mp3_to_wav(mp3_path, wav_path):
    audio = AudioSegment.from_mp3(mp3_path)
    audio = audio.set_channels(1).set_frame_rate(16000)
    audio.export(wav_path, format="wav")
    print(f"Converted {mp3_path} to {wav_path}")

def transcribe_audio_with_whisper(wav_path):
    model = whisper.load_model("small")
    result = model.transcribe(wav_path)
    return result["text"]

def summarize_text(text):
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
  summary = summarizer(text, max_length=100, min_length=50, do_sample=False)
  return summary[0]['summary_text']

def extract_key_points(text):
    kw_model = KeyBERT()
    keywords = kw_model.extract_keywords(
        text,
        keyphrase_ngram_range=(1, 2),
        stop_words='english',
        top_n=8,
        use_maxsum=True,
        nr_candidates=20
    )
    return [kw[0] for kw in keywords]

def extract_named_entities(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)

    entities = {
        "People": [ent.text for ent in doc.ents if ent.label_ == "PERSON"],
        "Organizations": [ent.text for ent in doc.ents if ent.label_ == "ORG"],
        "Locations": [ent.text for ent in doc.ents if ent.label_ in ["GPE", "LOC"]]
    }

    return entities

def assign_tasks(key_points, entities):
    tasks = []
    for i, task in enumerate(key_points):
        assignee = entities["People"][i % len(entities["People"])] if entities["People"] else "Unassigned"
        tasks.append(f"{assignee} is assigned to {task}.")
    return tasks

mp3_file = "/content/webinar3.mp3"
wav_file = "/content/converted_audio.wav"

convert_mp3_to_wav(mp3_file, wav_file)
transcription = transcribe_audio_with_whisper(wav_file)
print("Full Transcription:\n", transcription)

summary = summarize_text(transcription)
print("\nSummarized Text:\n", summary)

key_points = extract_key_points(transcription)
print("\nKey Points for Tasks:\n", key_points)

entities = extract_named_entities(transcription)
print("\nIdentified People, Organizations, and Locations:\n", entities)

task_assignments = assign_tasks(key_points, entities)
print("\nAssigned Tasks:\n")
for task in task_assignments:
    print(task)

Converted /content/webinar3.mp3 to /content/converted_audio.wav


100%|███████████████████████████████████████| 461M/461M [00:09<00:00, 52.6MiB/s]


Full Transcription:
  So, I'm happy to report we have succeeded in rebooting our flagships. Wait ahead of schedule. Yet again. This has allowed us to improve our USPs by 90%. That's nine out of every ten. Excellent, thank you Samuel. Timothy, what's the status with the linear solutions? Could you give us the latest on the square project? The linear solutions department has, yet again, functioned at full capacity, and we have fulfilled 114% of this month's objectives. You can read the handouts for a detailed account, but in short, as I reported last week, we've decided to use the right angle for the square project. Now, there's still some debate as to how big a right angle is, so we're testing currently at 90, 97, 100 and 101 degrees. As agreed, I've ensured Anderson cross-checks the design, removing any left angles and doesn't use them in future. Overall, we have very good progress with the design. Are we on track with the schedule? When are we required to start delivering? I'm happy t

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Device set to use cpu



Summarized Text:
  Samuel: We have succeeded in rebooting our flagships ahead of schedule . This has allowed us to improve our USPs by 90%. That's nine out of every ten . Samuel: The linear solutions department has functioned at full capacity, and we have fulfilled 114% of this month's objectives .


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


Key Points for Tasks:
 ['flagships wait', 'revised delivery', 'angle testing', 'square', 'status linear', 'start deliveries', 'schedule required', 'project linear']

Identified People, Organizations, and Locations:
 {'People': ['Samuel', 'Timothy', 'Anderson', 'Timothy', 'Anderson'], 'Organizations': [], 'Locations': []}

Assigned Tasks:

Samuel is assigned to flagships wait.
Timothy is assigned to revised delivery.
Anderson is assigned to angle testing.
Timothy is assigned to square.
Anderson is assigned to status linear.
Samuel is assigned to start deliveries.
Timothy is assigned to schedule required.
Anderson is assigned to project linear.


In [5]:
!pip install uvicorn pydub openai-whisper transformers

Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-man

In [7]:
!pip install keybert

Collecting keybert
  Downloading keybert-0.9.0-py3-none-any.whl.metadata (15 kB)
Downloading keybert-0.9.0-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.4/41.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keybert
Successfully installed keybert-0.9.0
