In [1]:
#!pip install -U stable-ts

In [1]:
import stable_whisper
import os
from datetime import datetime
import json
import time

In [2]:
output_path = os.path.join(os.getcwd(), 'output')

if not os.path.exists(output_path):
    os.mkdir(output_path)

model = stable_whisper.load_model('large', download_root="/data/whisper")

In [6]:
options = {'language': 'english'}

result = model.transcribe('samples/vhf1.mp3', verbose=True, word_timestamps=False, **options)

[00:00.000 --> 00:05.000]  Port Service, this is Korea. How do you read me? Over.
[00:05.000 --> 00:10.000]  Korea, this is Port Service. I read you good. Change to channel 1-2. Over.
[00:10.000 --> 00:18.000]  Port Service, this is Korea. Change to channel 1-2. Out.
[00:18.000 --> 00:25.000]  Korea, this is Port Service. What is your ETA at pilot station? Over.
[00:28.000 --> 00:35.000]  Port Service, my ETA at pilot station is 1-0-0-0 hours local time. Over.
[00:35.300 --> 00:43.000]  Korea, what is your call sign? Over.
[00:43.000 --> 00:48.520]  Port Service, my call sign is Delta Sierra Echo Victor 9. Over.
[00:48.520 --> 00:53.520]  Korea, what was your last port of call and gross tonnage? Over.
[00:53.520 --> 01:06.000]  Port Service, my last port of call was Japan and my gross tonnage is 1-5-7-8-9 tonnage. Over.
[01:06.520 --> 01:10.520]  Korea, do you carry any dangerous goods? Over.
[01:10.520 --> 01:18.160]  Port Service, I do not carry any dangerous goods. My cargo is fuel 

In [11]:
# see the methods of the result object
print(dir(result))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_merge_segments', '_split_segments', 'add_segments', 'all_words', 'get_gap_indices', 'get_gaps', 'get_locked_indices', 'get_punctuation_indices', 'language', 'merge_by_gap', 'merge_by_punctuation', 'ori_dict', 'reassign_ids', 'regroup', 'remove_no_word_segments', 'rescale_time', 'reset', 'save_as_json', 'segments', 'segments_to_dicts', 'split_by_gap', 'split_by_punctuation', 'suppress_silence', 'text', 'to_ass', 'to_dict', 'to_srt_vtt', 'unlock_all_segments']


In [17]:
result.ori_dict['text']

' Port Service, this is Korea. How do you read me? Over. Korea, this is Port Service. I read you good. Change to channel 1-2. Over. Port Service, this is Korea. Change to channel 1-2. Out. Korea, this is Port Service. What is your ETA at pilot station? Over. Port Service, my ETA at pilot station is 1-0-0-0 hours local time. Over. Korea, what is your call sign? Over. Port Service, my call sign is Delta Sierra Echo Victor 9. Over. Korea, what was your last port of call and gross tonnage? Over. Port Service, my last port of call was Japan and my gross tonnage is 1-5-7-8-9 tonnage. Over. Korea, do you carry any dangerous goods? Over. Port Service, I do not carry any dangerous goods. My cargo is fuel products. Over. Korea, your message understood. You must call when you pass Alpha Island on channel 1-2. Over. Port Service, we call when we pass Alpha Island on channel 1-2. Over. Korea, stand by channel 1-6 and 1-2. Out.'

In [46]:
# print file name except extension
os.path.splitext(os.path.basename('whisper_audio/vhf2_edited.mp3'))[0]

'vhf2_edited'

In [16]:
# modified model should run just like the regular model but accepts additional parameters
result = model.transcribe('whisper_audio/vhf2_edited.mp3', verbose=True, word_timestamps= False, **options)
# srt/vtt
result.to_srt_vtt(output_path + '/audio.srt')
# ass
result.to_ass(output_path + '/audio.ass')
# json
result.save_as_json(output_path + '/audio.json')

[00:00.000 --> 00:09.000]  German Bay, traffic, German Bay, traffic, motor vessel Marfam, Marfam, Bava, Charlie, November, hotel, I'm ready, over.
[00:10.000 --> 00:14.000]  Yeah, Marfam, German Bay, traffic, good afternoon.
[00:15.000 --> 00:23.000]  Good afternoon, sir, motor vessel Marfam, entering the monitoring area, Bowne 4, Bremen, Germany, over.
[00:24.000 --> 00:30.000]  Yeah, coming from Archangels, how many persons on board and your security level?
[00:31.000 --> 00:36.000]  Eight persons on board and security level number one, over.
[00:37.000 --> 00:39.000]  Yeah, what is the draft today?
[00:40.000 --> 00:43.000]  My maximum draft is 5.95, over.
[00:44.000 --> 00:48.000]  Okay, and your present ETA to wizard pilot station?
[00:49.000 --> 00:55.000]  My present ETA to wizard pilot station, 1-7-0-0, local time, over.
[00:58.000 --> 01:04.000]  You will run on slow speed to be at 1700 at the outer pilot station, yeah?
[01:05.000 --> 01:12.000]  Yes, sir, that's correct. As p

In [21]:
with open(output_path + '/audio.json') as f:
    data = json.load(f)

In [43]:
def delay_segment(text, start, end):
    delayed = end - start
    start_timestamp = datetime.utcfromtimestamp(start).strftime('%H:%M:%S')
    end_timestamp = datetime.utcfromtimestamp(end).strftime('%H:%M:%S')
    print(f"[{start_timestamp}] --> [{end_timestamp}]", end=' ')
    print(text)
    time.sleep(delayed)

for i, segment in enumerate(data['ori_dict']['segments']):
    start = segment['start']
    end = segment['end']
    # print(segment['text'])
    delay_segment(segment['text'], start, end)


[0.0] --> [9.0]  German Bay, traffic, German Bay, traffic, motor vessel Marfam, Marfam, Bava, Charlie, November, hotel, I'm ready, over.
[10.0] --> [14.0]  Yeah, Marfam, German Bay, traffic, good afternoon.
[15.0] --> [23.0]  Good afternoon, sir, motor vessel Marfam, entering the monitoring area, Bowne 4, Bremen, Germany, over.
[24.0] --> [30.0]  Yeah, coming from Archangels, how many persons on board and your security level?
[31.0] --> [36.0]  Eight persons on board and security level number one, over.
[37.0] --> [39.0]  Yeah, what is the draft today?
[40.0] --> [43.0]  My maximum draft is 5.95, over.
[44.0] --> [48.0]  Okay, and your present ETA to wizard pilot station?
[49.0] --> [55.0]  My present ETA to wizard pilot station, 1-7-0-0, local time, over.
[58.0] --> [64.0]  You will run on slow speed to be at 1700 at the outer pilot station, yeah?
[65.0] --> [72.0]  Yes, sir, that's correct. As per agent, our pilot will be 1-7-0-0, local time, over.
[73.0] --> [84.0]  Okay, it's order