In [1]:
import kss
import os
import re

In [5]:
def process_file(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as file:
        data = file.read()

    words_with_timestamps = re.findall(r"(\S+)\s+-\s+시작:\s+(\d+\.\d)초,\s+끝:\s+(\d.\d)초", data)

    sentences = kss.split_sentences(" ".join([word for word, _, _ in words_with_timestamps]))
    sentence_start_times = {}
    sentence_end_times = {}

    for word, start_time, end_time in words_with_timestamps:
        sentence_start_times[word] = float(start_time)
        sentence_end_times[word] = float(end_time)

    with open(output_file, 'w', encoding='utf-8') as file:
        for sentence in sentences:
            words = sentence.split()
            start_time = sentence_start_times.get(words[0], 0)
            end_time = sentence_end_times.get(words[-1], 0) # 변경된 부분
            file.write(f"{sentence} - 시작: {start_time:.1f}초, 끝: {end_time:.1f}초\n\n")


In [6]:
input_folder = "D:\\video-summarization-master\\videos\\time_text"
output_folder = "D:\\video-summarization-master\\videos\\kss_text"

os.makedirs(output_folder, exist_ok=True)

for file_name in os.listdir(input_folder):
    input_file = os.path.join(input_folder, file_name)
    output_file = os.path.join(output_folder, file_name)
    process_file(input_file, output_file)

In [20]:
import os

def process_file_improved(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as file:
        data = file.readlines()

    current_sentence = ""
    first_time = data[0].split(" - ")[-1].split("초, ")[0].split("시작: ")[1]
    last_time = ""

    for line in data:
        line = line.strip()
        if line:
            current_sentence, time = line.split(" - ")
            current_sentence = current_sentence.strip()
            last_time = time.split("초, ")[1].split("끝: ")[1]

    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(f"{current_sentence} - 시작: {first_time}초, 끝: {last_time}초\n\n")

input_folder = "D:\\video-summarization-master\\videos\\time_text"
output_folder = "D:\\video-summarization-master\\videos\\kss_text_2"

os.makedirs(output_folder, exist_ok=True)

for file_name in os.listdir(input_folder):
    input_file = os.path.join(input_folder, file_name)
    output_file = os.path.join(output_folder, file_name)
    process_file_improved(input_file, output_file)



In [2]:
import subprocess
import os

input_directory = "D:\\video-summarization-master\\videos\\ffmpeg_data"
output_directory = "D:\\video-summarization-master\\videos\\ffmpeg_wav"

def convert_mp4_to_wav(input_directory, output_directory):
    for filename in os.listdir(input_directory):
        if filename.endswith(".mp4"):
            input_file_path = os.path.join(input_directory, filename)
            output_file_path = os.path.join(output_directory, filename[:-4] + ".wav")
            command = f'ffmpeg -i "{input_file_path}" -vn -ac 2 -ar 44100 -ab 320k -f wav "{output_file_path}"'
            subprocess.call(command, shell=True)

convert_mp4_to_wav(input_directory, output_directory)
