In [None]:
OPENAI_API_KEY = "APIキーを入力"

In [None]:
import whisper
from google.colab import files
from langchain import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.chat_models import ChatOpenAI
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate


class VideoConversationSummarizer:
    def __init__(self):
        self.filename = None
        self.whisper_result = None
        self.langchain_result = None

    def run(self):
        self.upload_file()
        self.use_whisper()
        self.use_langchain()
        print(f"要約結果: {self.langchain_result}")

    def upload_file(self):
        try:
            uploaded = files.upload()
            self.filename = list(uploaded.keys())[0]
            print(
                f"アップロード完了: {self.filename}, サイズ: {len(uploaded[self.filename])} bytes"
            )
        except Exception as e:
            print(f"アップロード失敗: {e}")

    def use_whisper(self):
        if self.filename is None:
            print("use_whisper: ファイルがアップロードされていません")
            return

        try:
            model = whisper.load_model("large")
            self.whisper_result = model.transcribe(
                f"/content/{self.filename}", verbose=True, language="ja"
            ).get("text")
            if self.whisper_result:
                with open("whisper_result.txt", "w") as f:
                    f.write(self.whisper_result)
                print("文字起こし完了")
            else:
                print("use_whisper: 文字起こしに失敗しました")
        except Exception as e:
            print(e)

    def use_langchain(self):
        if self.whisper_result is None:
            print("use_langchain: 文章が入力されていません")
            return

        try:
            prompt = PromptTemplate(
                template=(
                    "あなたは優秀なライターで, 文章の要約を得意としています"
                    "\n####\n"
                    "{text}"
                    "\n####\n"
                    "日本語としておかしい部分は修正しながら, この会議の会話を500文字程度で要約してください"
                ),
                input_variables=["text"],
            )
            llm = ChatOpenAI(
                openai_api_key=OPENAI_API_KEY,
                temperature=0,
            )

            # 3,000文字ごとに分割
            docs = [
                Document(page_content=s)
                for s in self._split_string(self.whisper_result, 3000)
            ]

            chain = load_summarize_chain(
                llm,
                chain_type="map_reduce",
                map_prompt=prompt,
                combine_prompt=prompt,
                verbose=True,
            )
            self.langchain_result = chain(
                {"input_documents": docs}, return_only_outputs=True
            ).get("output_text")

            if self.langchain_result:
                with open("result.txt", "w") as f:
                    f.write(self.langchain_result)
                print("要約完了")
            else:
                print("use_langchain: 要約に失敗しました")
        except Exception as e:
            print(e)

    @staticmethod
    def _split_string(s, length):
        return [s[i : i + length] for i in range(0, len(s), length)]

In [None]:
summarizer = VideoConversationSummarizer()
summarizer.run()