In [24]:
import os
import json
import time
from pathlib import Path 
import whisper 
import logging
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer

In [25]:
class Monitoring(FileSystemEventHandler):
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self._ensure_tracker_file_exists()
        
    def _ensure_tracker_file_exists(self):
        json_file_path = os.path.join(self.folder_path, "transcript_tracker.json")
        if not os.path.exists(json_file_path):
            print(f"{json_file_path} not found. Creating a new one...")
            with open(json_file_path, "w") as j:
                json.dump({}, j)
            time.sleep(1)

    def on_created(self, event):
        if not event.is_directory and os.path.isfile(event.src_path):
            json_file_path = os.path.join(self.folder_path, "transcript_tracker.json")
            try:
                self._ensure_tracker_file_exists()
                
                with open(json_file_path, "r") as j:
                    json_file = json.load(j)
                
                if event.src_path not in json_file.keys():
                    print(f"New file detected: {event.src_path}")
                    system = TranscriberSystem(self.folder_path)
                    system.process_files()
            except Exception as e:
                print(f"Error processing new file: {e}")
        
    def observer(self):
        print("Observer has started, monitoring folder:", self.folder_path)

        try:
            observer = Observer()
            observer.schedule(self, self.folder_path, recursive=True)
            observer.start()
            try:
                while True:
                    time.sleep(1)
            except KeyboardInterrupt:
                print("Keyboard interrupt received, stopping observer...")
                observer.stop()
                observer.join()
                print("Observer Stopped.")
        except Exception as e:
            print(f"Observer error: {e}")

In [26]:
class FileScanner:
    def __init__(self,folder_path,TranscriptTracker):
        self.tracker = TranscriptTracker
        self.folderpath=folder_path
        self.scan_folder=self.scan(self.is_supported(folder_path))

    def is_supported(self,folder_path):
        supp=[".mp4",".wav",".mp3",".ogg",".mkv",".mov", ".flv",".aac",".m4a"]
        supported_files=[]
        for i in Path(folder_path).rglob("*"):
            if i.suffix.lower() in supp:
                supported_files.append(i)
            else:
                continue
        return supported_files
    
    def scan(self,supported_files):
        a=[]
        for f in supported_files:
            if not self.tracker.is_processed(f):
                a.append(f)
            else:
                continue
        return a

In [27]:
class TransciptTracker():
    def __init__(self, transcript_file):
        self.tracker_file = transcript_file
        self.processed_files=self.load_tracker()
    
    def load_tracker(self):
        if os.path.exists(self.tracker_file):
            with open(self.tracker_file, 'r') as f:
                return json.load(f)
        return {}
        
    def is_processed(self,file_path):
        file_path_str=str(file_path)
        if file_path_str in self.processed_files:
            last_mtime=self.processed_files[file_path_str]["mtime"]
            return last_mtime==os.path.getmtime(file_path)
        return False

    def mark_processed(self,file_path):
        file_path_str=str(file_path)
        self.processed_files[file_path_str]={
            "mtime":os.path.getmtime(file_path),
            "processed_at":time.time()
        }
        self.save_tracker()
    
    def save_tracker(self):
        with open(self.tracker_file, 'w') as f:
            json.dump(self.processed_files, f, indent=2)
    pass

In [28]:
class Transcriber():
    def __init__(self,transcript_tracker):
        self.model = whisper.load_model("base")
        self.tracker = TransciptTracker

    def file_str_path(self,file_path):
        file_path_strr=str(file_path)
        return file_path_strr
    
    def transcribe(self,file_path_strr):
        try:
            transcribed_text=self.model.transcribe(file_path_strr)['text']
            return transcribed_text
        except Exception as e:
            return f"Error: {e}"
    pass

In [29]:
class TranscriberSystem:
    def __init__(self,folder_path):
        self.tracker = TransciptTracker(os.path.join(folder_path, "transcript_tracker.json")) 
        self.scanner = FileScanner(folder_path, self.tracker) 
        self.transcriber = Transcriber(self.tracker) 
    
    def process_files(self):
        for file_path in self.scanner.scan_folder:
            print(f"Processing: {file_path}")
            trans=self.transcriber.transcribe(str(file_path))
            output_file=file_path.with_suffix(".txt")
            with open(output_file,"w",encoding='utf-8') as f:
                f.write(trans)
            print(f"saved_transciption: {output_file}")
            self.tracker.mark_processed(file_path)

In [30]:
def main():
    folder_path = "C:\\projects\\live_transcipt\\files"
    system = TranscriberSystem(folder_path)
    system.process_files()
    monitor = Monitoring(folder_path)
    monitor.observer()

In [None]:
if __name__ == "__main__":
    main()

Observer has started, monitoring folder: C:\projects\live_transcipt\files
New file detected: C:\projects\live_transcipt\files\IMG_6717.MOV
Processing: C:\projects\live_transcipt\files\IMG_6717.MOV
saved_transciption: C:\projects\live_transcipt\files\IMG_6717.txt
New file detected: C:\projects\live_transcipt\files\IMG_6717.txt
