# Voice recording summarizer


## 1. Setup

In [1]:
!pip install git+https://github.com/openai/whisper.git  -q
!pip install langchain moviepy "openai<1.0.0" tiktoken pytube -q

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m974.6/974.6 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.6/315.6 kB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 2. Configuration

In [3]:
# whisper_model_size = 'medium'
whisper_model_size = 'large-v2'

language = 'en' # English

content_source = "google_drive"

file_path = "/content/drive/MyDrive/QA-01.mp3"

recording_type = "short conversation"

summary_structure = """
  ## Summary
  Main highlights here. 3-7 paragraphs.

  ## Jobs
  - one
  - two
  - ..

  ## Pains
  - one
  - two
  - ..

  ## Gains
  - one
  - two
  - ..
"""

In [4]:
import openai

openai.api_key = "..."

Enter the OpenAI API Key in the cell: ··········


In [5]:
models = openai.Model.list()
for model in models["data"]:
  print(model.keys())
  print(model)

dict_keys(['id', 'object', 'created', 'owned_by'])
{
  "id": "gpt-3.5-turbo-1106",
  "object": "model",
  "created": 1698959748,
  "owned_by": "system"
}
dict_keys(['id', 'object', 'created', 'owned_by'])
{
  "id": "whisper-1",
  "object": "model",
  "created": 1677532384,
  "owned_by": "openai-internal"
}
dict_keys(['id', 'object', 'created', 'owned_by'])
{
  "id": "babbage-002",
  "object": "model",
  "created": 1692634615,
  "owned_by": "system"
}
dict_keys(['id', 'object', 'created', 'owned_by'])
{
  "id": "dall-e-2",
  "object": "model",
  "created": 1698798177,
  "owned_by": "system"
}
dict_keys(['id', 'object', 'created', 'owned_by'])
{
  "id": "gpt-3.5-turbo-16k",
  "object": "model",
  "created": 1683758102,
  "owned_by": "openai-internal"
}
dict_keys(['id', 'object', 'created', 'owned_by'])
{
  "id": "tts-1-hd-1106",
  "object": "model",
  "created": 1699053533,
  "owned_by": "system"
}
dict_keys(['id', 'object', 'created', 'owned_by'])
{
  "id": "tts-1-hd",
  "object": "mode

In [6]:
%%time

import pathlib
import whisper

model_path = pathlib.Path("/content/whisper/"+whisper_model_size+".pt")
if model_path.exists():
  print ("Model has been downloaded, no re-download necessary")
else:
  print ("Starting download of Whisper Model")
  whisper._download(whisper._MODELS[whisper_model_size], '/content/whisper/', False)

Starting download of Whisper Model


100%|█████████████████████████████████████| 2.87G/2.87G [03:34<00:00, 14.4MiB/s]


CPU times: user 19.1 s, sys: 9.91 s, total: 29 s
Wall time: 3min 49s


##3. Transcribe the recording
- The video file will be automatically converted into audio before the transcription.
- Text file will be immediately added as a transcription

In [7]:
from moviepy.editor import *
import os
text_trascript_file_path = False

def transcribe_recording(whisper_model_size, file_path):
  if not file_path.endswith('.mp3'):
    print ("File is not an mp3. Converting to audio...")
    video = VideoFileClip(file_path)
    base_name = os.path.basename(file_path)
    output_name = os.path.splitext(base_name)[0] + ".mp3"
    output_path = os.path.join("/content/", output_name)
    video.audio.write_audiofile(output_path)
    file_path = output_path
  print ("Starting the audio transcription...")
  whisper_model = whisper.load_model(whisper_model_size, device='cuda', download_root='/content/whisper/')
  print ("Loaded the '"+whisper_model_size+"' Whisper model...")
  result = whisper_model.transcribe(file_path, language=language)
  return result['text']

if file_path.endswith('.txt'):
  print ("The submitted file is a text file.")
  text_trascript_file_path = file_path
  with open(text_trascript_file_path, "r") as file:
    short_conversation = file.read()
else:
  print ("File is not a text. Parsing the media...")
  short_conversation = transcribe_recording(whisper_model_size, file_path)
  base_name = os.path.basename(file_path)
  text_output_name = "transcript__"+os.path.splitext(base_name)[0] + ".txt"
  print ("The transcript is ready. Saving as a "+text_output_name+" ...")
  with open(text_output_name, "w") as file:
      file.write(short_conversation)
  text_trascript_file_path = "/content/" + text_output_name

import tiktoken
enc = tiktoken.encoding_for_model("gpt-3.5-turbo-16k")
print ("Number of tokens:", len(enc.encode(short_conversation)))

File is not a text. Parsing the media...
Starting the audio transcription...
Loaded the 'large-v2' Whisper model...
The transcript is ready. Saving as a transcript__QA-01.txt ...
Number of tokens: 210


In [8]:
# Check the transcription happened correctly by peeking into the first 1000 characters
short_conversation[:1000]

" How, many people are there in your family there are five people in my family, my father mother brother sister and me Does your family live in a house or an apartment? We live in a house in the countryside What does your father do? My father is a doctor he works at the local hospital How old is your mother? She is 40 years old one year younger than my father Do you have any siblings? What's his or her name? Yes, I do. I have one elder brother David and one younger sister Mary Are you the oldest among your brothers and sisters? No, I'm not I'm the second child in my family What does your mother father like? My father likes playing football and my mother likes cooking Do your parents let you stay out late? Of course not they always ask me to get home before 10 p.m.. Each night. Do you stay with your parents? Right now no, but I used to Does your family usually have dinner together? Yes, we do my mom always prepares delicious meals for us"