In [13]:
import torch
import yt_dlp
import pandas as pd
import librosa
import time

from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline


In [4]:
import os
folder_path = 'audio_files/'
for filename in os.listdir(folder_path):
    print(os.path.join(folder_path, filename))

audio_files/May 16, 2023 Commission on Water Resource Management Meeting.wav


In [5]:
URLS = [
    'https://www.youtube.com/watch?v=4tOiX5j3_ek', # January 24/23
    'https://www.youtube.com/watch?v=HvSKz5oZ95I', # March 21/23
    'https://www.youtube.com/watch?v=JCRCJOhJ3EY', # April 18/23
    'https://www.youtube.com/watch?v=BRMCqJpyfnE', # May 16/23
    'https://www.youtube.com/watch?v=owf7KgXaN7E', # June 20/23
    'https://www.youtube.com/watch?v=K94fdr_Abpk', # July 26/23
]


In [8]:
# set true if audio files are already in directory
skip = True

if not skip:

    # yt_dlp options
    ydl_opts = {
        'format': 'bestaudio/best',  
        'postprocessors': [{  # Extract audio using ffmpeg
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',  # Convert to WAV format, lossless format
        }],
        'postprocessor_args': [
            '-ar', '16000'  # Set the audio sample rate to 16,000 Hz for use with whisper
        ],
        'outtmpl': '%(title)s.%(ext)s', 
    }
    
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download(URLS)



[youtube] Extracting URL: https://www.youtube.com/watch?v=4tOiX5j3_ek
[youtube] 4tOiX5j3_ek: Downloading webpage
[youtube] 4tOiX5j3_ek: Downloading ios player API JSON
[youtube] 4tOiX5j3_ek: Downloading web creator player API JSON
[youtube] 4tOiX5j3_ek: Downloading m3u8 information
[info] 4tOiX5j3_ek: Downloading 1 format(s): 251
[download] Destination: January 24, 2023 Commission on Water Resource Management Meeting.webm
[download] 100% of  146.50MiB in 00:00:10 at 14.24MiB/s    
[ExtractAudio] Destination: January 24, 2023 Commission on Water Resource Management Meeting.wav
Deleting original file January 24, 2023 Commission on Water Resource Management Meeting.webm (pass -k to keep)
[youtube] Extracting URL: https://www.youtube.com/watch?v=HvSKz5oZ95I
[youtube] HvSKz5oZ95I: Downloading webpage
[youtube] HvSKz5oZ95I: Downloading ios player API JSON
[youtube] HvSKz5oZ95I: Downloading web creator player API JSON
[youtube] HvSKz5oZ95I: Downloading m3u8 information
[info] HvSKz5oZ95I: Dow

In [9]:
s = time.time()

device = torch.device("mps")
torch_dtype = torch.float16

model_id = "openai/whisper-large-v3"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch_dtype,
    device=device,
    chunk_length_s=30,
    batch_size=6,
    
)

e = time.time()
runtime = e - s
print(f"Runtime: {runtime:.2f} seconds")

Runtime: 7.63 seconds


In [10]:
s = time.time()

# Load audio file using librosa
audio_file_path = "January 24, 2023 Commission on Water Resource Management Meeting.wav"
audio_data, sample_rate = librosa.load(audio_file_path, sr=None)  

# Resample audio data to 16,000 Hz
target_sample_rate = 16000
if sample_rate != target_sample_rate:
    audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=target_sample_rate)
    sample_rate = target_sample_rate

result = pipe(audio_data, return_timestamps=True)

e = time.time()
runtime = e - s
print(f"Runtime: {runtime:.2f} seconds")

Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Runtime: 1524.31 seconds


In [11]:
s = time.time()

print(result)

e = time.time()
runtime = e - s
print(f"Runtime: {runtime:.2f} seconds")

Runtime: 0.00 seconds


In [14]:
df = pd.DataFrame(result["chunks"])
print(df.head(5))

        timestamp                                               text
0      (0.0, 3.0)                                        One minute.
1      (3.0, 4.0)                                              Okay.
2   (22.0, 26.72)                Yeah, we'll wait till to on the dot
3  (27.68, 40.64)                                              julie
4    (48.0, 50.0)   keep the running tabs while you're doing it. ...


In [15]:
df[['start_time', 'end_time']] = pd.DataFrame(df['timestamp'].tolist(), index=df.index)
print(df.head(5))

        timestamp                                               text  \
0      (0.0, 3.0)                                        One minute.   
1      (3.0, 4.0)                                              Okay.   
2   (22.0, 26.72)                Yeah, we'll wait till to on the dot   
3  (27.68, 40.64)                                              julie   
4    (48.0, 50.0)   keep the running tabs while you're doing it. ...   

   start_time  end_time  
0        0.00      3.00  
1        3.00      4.00  
2       22.00     26.72  
3       27.68     40.64  
4       48.00     50.00  


In [15]:
df[['start_time', 'end_time']] = pd.DataFrame(df['timestamp'].tolist(), index=df.index)
print(df.head(5))

        timestamp                                               text  \
0      (0.0, 3.0)                                        One minute.   
1      (3.0, 4.0)                                              Okay.   
2   (22.0, 26.72)                Yeah, we'll wait till to on the dot   
3  (27.68, 40.64)                                              julie   
4    (48.0, 50.0)   keep the running tabs while you're doing it. ...   

   start_time  end_time  
0        0.00      3.00  
1        3.00      4.00  
2       22.00     26.72  
3       27.68     40.64  
4       48.00     50.00  


In [16]:
def time_to_seconds(hrs, mins):
    
    """
    Convert time given in hours and minutes to total seconds.
    """
    
    total_seconds = (hrs * 3600) + (mins * 60)
    return total_seconds

In [17]:
def combine_text_by_time(df, start_hr, start_min, end_hr, end_min):
    # Convert start and end times to seconds
    start_sec = time_to_seconds(start_hr, start_min)
    end_sec = time_to_seconds(end_hr, end_min)
    
    # Filter the DataFrame based on the start and end times
    filtered_df = df[(df['start_time'] >= start_sec) & (df['end_time'] <= end_sec)]
    
    # Combine the text from the filtered rows into a single string
    combined_text = ' '.join(filtered_df['text'])
    return combined_text


In [19]:
result_text = combine_text_by_time(df, 0, 12, 1, 17)
print(result_text)

 staff miss roth are you gonna are you gonna present the submit all right uh aloha chair  um item b1  is everyone in Okay. You just set us on your for caffeine. Give me a.  We do have a couple people that I have not been able to verify so. For the person identified as Lopez panelists, which item are you here for?  See one. and roger okay got it i think we should be good thank you  very good thank you very much go ahead aloha chair commissioners katie roth planning program manager  with the commission item b1 is approval of chairperson's appointment of marvin kaleo  manuel as first deputy to the chairperson of the commission on water resource management Mr. Manuel has served as deputy since January 28 2019 and has been  instrumental in moving forward significant decisions by the commission a list of those  major accomplishments are in the middle along with background and his resume and I'm happy to answer  any questions you may have otherwise staff stands on the middle thank you so very

In [20]:
from openai import OpenAI
key = 
client = OpenAI(
  api_key=key, 
)


In [21]:

prompt =[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": f"Please summarize the following text in a linear flow, include sentiment, use speakers names, and include the action item number, seperate speakers for and against, list reasons for each. Can you track the meeting in a linear flow.:\n\n{result_text}"}
]

response = client.chat.completions.create(
    model="gpt-4o-mini",  # You can use "gpt-3.5-turbo" or other engines as well
    messages=prompt,
    max_tokens=1000,  
    temperature=0.2, 
)


In [22]:
print(response.choices[0].message.content)

### Summary of Meeting on Item B1: Appointment of Marvin Kaleo Manuel

**Action Item**: Approval of Chairperson's appointment of Marvin Kaleo Manuel as First Deputy to the Chairperson of the Commission on Water Resource Management.

---

**Supportive Speakers**:

1. **Katie Roth (Planning Program Manager)**:
   - Sentiment: Supportive
   - Reason: Mr. Manuel has been instrumental since January 2019, with significant accomplishments detailed in his resume.

2. **Kevin Chang**:
   - Sentiment: Supportive
   - Reason: Praised Mr. Manuel's community engagement and understanding of water's cultural significance.

3. **Leimana Damate**:
   - Sentiment: Supportive
   - Reason: Highlighted unanimous support from Hawaiian civic clubs and Mr. Manuel's responsiveness to community needs.

4. **Wayne Chung Tanaka (Sierra Club of Hawaii)**:
   - Sentiment: Strongly supportive
   - Reason: Noted Mr. Manuel's extensive experience and successful initiatives during his tenure.

5. **Karen Kanekoa**:
   

In [77]:
df.to_csv('output.csv', index=False)