In [60]:
import os
import io
from google.cloud import speech_v1p1beta1 as speech
from moviepy.editor import *
from pydub import AudioSegment

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "C:/Users/user/Desktop/graduate-388812-57af2b042054.json"

def extract_audio_from_video(video_path, audio_path):
    video = VideoFileClip(video_path)
    audio = video.audio
    temp_audio_path = "D:/video-summarization-master/videos/new/temp.wav"  
    audio.write_audiofile(temp_audio_path, codec='pcm_s16le')  

    audio_segment = AudioSegment.from_wav(temp_audio_path)
    mono_audio = audio_segment.set_channels(1)
    mono_audio.export(audio_path, format="wav")

    os.remove(temp_audio_path)

def split_audio_into_chunks(audio_path, chunk_length=5000):
    audio = AudioSegment.from_wav(audio_path)
    audio_chunks = []

    if len(audio) <= chunk_length:
        return [audio_path]

    for i, chunk in enumerate(audio[::chunk_length]):
        chunk_path = f"{audio_path[:-4]}_chunk{i}.wav"
        chunk.export(chunk_path, format="wav")
        audio_chunks.append(chunk_path)

    return audio_chunks

def transcribe_video(audio_path):
    client = speech.SpeechClient()

    audio_chunks = split_audio_into_chunks(audio_path)

    final_response = []

    for chunk_path in audio_chunks:
        with io.open(chunk_path, "rb") as audio_file:
            content = audio_file.read()

        audio = speech.RecognitionAudio(content=content)
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=44100,
            language_code="en-US",
            enable_automatic_punctuation=True,
            enable_word_time_offsets=True,
        )

        operation = client.long_running_recognize(config=config, audio=audio)
        response = operation.result(timeout=500)

        final_response.extend(response.results)

        os.remove(chunk_path)

    return final_response

if __name__ == "__main__":
    video_dir = "D:/video-summarization-master/videos/new/data2"
    audio_dir = "D:/video-summarization-master/videos/new/wav"
    text_dir = "D:/video-summarization-master/videos/new/text"

    video_filenames = [f for f in os.listdir(video_dir) if f.endswith(".mp4")]

    for video_filename in video_filenames:
        video_path = os.path.join(video_dir, video_filename)

        audio_filename = f"{os.path.splitext(video_filename)[0]}.wav"
        audio_path = os.path.join(audio_dir, audio_filename)

        extract_audio_from_video(video_path, audio_path)

        response = transcribe_video(audio_path)

        text_filename = f"{os.path.splitext(video_filename)[0]}.txt"
        text_path = os.path.join(text_dir, text_filename)

        with open(text_path, "w") as text_file:
            for result in response:
                text_file.write(result.alternatives[0].transcript)
                text_file.write("\n")
                text_file.write("Confidence: {}\n".format(result.alternatives[0].confidence))
                text_file.write("\n")

                for word in result.alternatives[0].words:
                    text_file.write("Word: {}\n".format(word.word))
                    text_file.write("Start time: {} seconds\n".format(word.start_time.total_seconds()))
                    text_file.write("End time: {} seconds\n".format(word.end_time.total_seconds()))

                    text_file.write("\n")


                                                                     
[A                                                                  


[A[A[A                                                           

chunk:   1%|▏         | 27/1879 [19:09<21:54:17, 42.58s/it, now=None]

[A[A
[A

MoviePy - Writing audio in D:/video-summarization-master/videos/new/temp.wav





[A[A[A


[A[A[A


[A[A[A


[A[A[A


                                                                     
[A                                                                  


[A[A[A                                                            

chunk:   1%|▏         | 27/1879 [19:10<21:54:46, 42.60s/it, now=None]

[A[A
[A

MoviePy - Done.


FileNotFoundError: [Errno 2] No such file or directory: 'D:/video-summarization-master/videos/new/text\\1234.txt'

In [8]:
import os
import kss
import re

input_folder_path = "D:\\video-summarization-master\\videos\\time_text"
output_folder_path = "D:\\video-summarization-master\\videos\\ssk_text"

# 입력 폴더 내 모든 파일 확인
for filename in os.listdir(input_folder_path):
    # .txt 파일인 경우만 처리
    if filename.endswith(".txt"):
        input_file_path = os.path.join(input_folder_path, filename)
        output_file_path = os.path.join(output_folder_path, filename)

        # 워드별 구분된 입력 파일을 문장별 구분된 출력 파일로 변환
        with open(input_file_path, "r", encoding="utf-8") as input_file, open(output_file_path, "w", encoding="utf-8") as output_file:
            content_lines = input_file.readlines()
            word_intervals = []

            # 각 단어의 시작 시간 기록
            for line in content_lines:
                word_start_time = float(line.split(' - 시작: ')[1].split('초')[0])
                word_intervals.append((line.strip(), word_start_time))

            words = ' '.join([interval[0] for interval in word_intervals]).replace(" - 시작:", '').replace("초, 끝:", '').split("초")[1:]
            text = ' '.join(words)
            sentences = kss.split_sentences(text)

            if len(sentences) == 0:
                continue

            # 문장 구성 시작점 찾기
            sentence_start_times = [word_intervals[0][1]]
            words_index = 0

            for sentence in sentences[:-1]:
                words_index += len(sentence.split()) - 1
                if words_index < len(word_intervals) - 1:
                    sentence_start_times.append(word_intervals[words_index+1][1])
                else:
                    break

            # 문장 생성 및 출력
            for i, sentence in enumerate(sentences):
                if i < len(sentence_start_times):
                    start_time = sentence_start_times[i]
                else:
                    start_time = word_intervals[-1][1]

                if i+1 < len(sentence_start_times):
                    end_time = sentence_start_times[i+1]
                else:
                    end_time = word_intervals[-1][1]

                output_line = f"{sentence} - 시작:{start_time:.1f}초, 끝:{end_time:.1f}초"
                output_file.write(output_line + '\n')


KeyboardInterrupt: 

In [19]:
import os
import kss
import re

input_folder_path = "D:\\video-summarization-master\\videos\\ssk_text_1"
output_folder_path = "D:\\video-summarization-master\\videos\\ssk_text"

# 입력 폴더 내 모든 파일 확인
for filename in os.listdir(input_folder_path):
    # .txt 파일인 경우만 처리
    if filename.endswith(".txt"):
        input_file_path = os.path.join(input_folder_path, filename)
        output_file_path = os.path.join(output_folder_path, filename)

        # 워드별 구분된 입력 파일을 문장별 구분된 출력 파일로 변환
        with open(input_file_path, "r", encoding="utf-8") as input_file, open(output_file_path, "w", encoding="utf-8") as output_file:
            content_lines = input_file.readlines()
            word_intervals = []

            # 각 단어의 시작 시간 기록
            for line in content_lines:
                word_start_time = float(line.split(' - 시작: ')[1].split('초')[0])
                word = line.split(' - 시작: ')[0]
                word_intervals.append((word, word_start_time))

            words = ' '.join([interval[0] for interval in word_intervals])
            text = words
            sentences = kss.split_sentences(text)

            if len(sentences) == 0:
                continue

            # 문장의 endIndex 찾기
            start_index = 0
            for sentence in sentences[:-1]:
                sentence_start_time = word_intervals[start_index][1]
                endIndex = start_index + len(sentence.split()) - 1
                if endIndex >= len(word_intervals) - 1:
                    break

                # 문장 시작 타임스탬프를 설정하고 끝 인덱스를 업데이트합니다.
                sentence_end_time = word_intervals[endIndex][1]
                start_index = endIndex + 1

                output_line = f"{sentence}, 시작 시간:{sentence_start_time:.1f}초, 종료 시간:{sentence_end_time:.1f}초"
                output_file.write(output_line + '\n')


KeyboardInterrupt: 