In [None]:
import pgl_usage
import saiga_description
import subtitles_generation

In [None]:
from os import getenv
from pathlib import Path
from tempfile import TemporaryDirectory
from time import sleep
from json import dumps
from io import BytesIO

from dotenv import load_dotenv

from ..database import create_database_session, Upload, create_short, Short
from ..s3 import FileStorage

In [None]:
load_dotenv()

S3_ENDPOINT_URL = getenv("S3_ENDPOINT_URL")
S3_REGION = getenv("S3_REGION")
S3_ACCESS_KEY = getenv("S3_ACCESS_KEY")
S3_PRIVATE_ACCESS_KEY = getenv("S3_PRIVATE_ACCESS_KEY")
S3_BUCKET_NAME = getenv("S3_BUCKET_NAME")

DATABASE_HOST=getenv("DATABASE_HOST")
DATABASE_PORT=int(getenv("DATABASE_PORT"))
DATABASE_USER=getenv("DATABASE_USER")
DATABASE_PASSWORD=getenv("DATABASE_PASSWORD")
DATABASE_DBNAME=getenv("DATABASE_DBNAME")
DATABASE_DRIVER=getenv("DATABASE_DRIVER")


file_storge = FileStorage(S3_ENDPOINT_URL, S3_REGION, S3_ACCESS_KEY, S3_PRIVATE_ACCESS_KEY, S3_BUCKET_NAME)
database_session = create_database_session(DATABASE_DRIVER, DATABASE_HOST, DATABASE_PORT, DATABASE_USER, DATABASE_PASSWORD, DATABASE_DBNAME)

In [None]:
while True:
    unprocessed_upload = database_session.query(Upload).where(Upload.upload_state == "uploaded").all()[0]
    if unprocessed_upload is not None:
        with TemporaryDirectory() as upload_processing_directory_path_str:
            upload_processing_directory_path = Path(upload_processing_directory_path_str)
            upload_processing_source_video_file_path = upload_processing_directory_path.joinpath("source")

            with open(upload_processing_source_video_file_path, "wb") as upload_processing_source_video_file:
                upload_source_video = file_storge.download_file(unprocessed_upload.source_video_file_key)
                upload_processing_source_video_file.write(upload_source_video.read())

            saiga_outputs = []
            paths_for_whisper = pgl_usage.main(str(upload_processing_source_video_file_path))

            shorts_ids = list()
            for path in paths_for_whisper:
                with open(path,mode="rb") as short_video_file:
                    short_video_file_key = file_storge.upload_file(short_video_file)
                    short_id = create_short(database_session, unprocessed_upload.id, short_video_file_key, None,state="video")
                    shorts_ids.append(short_id)
            unprocessed_upload.upload_state = "shorts"
            database_session.commit()

            subtitles_info = {}

            for filepath, short_id in zip(paths_for_whisper, shorts_ids):
                subtitles_info[filepath] = subtitles_generation.inference(filepath)

                short_subtitles_file = BytesIO(bytes(dumps(subtitles_info[filepath]), encoding="utf-8"))
                short = database_session.get(Short, short_id)
                short_subtitles_file_key = file_storge.upload_file(short_subtitles_file, ".json")
                short.subtitles_file_key = short_subtitles_file_key
                database_session.commit()
            unprocessed_upload.upload_state = "subtitles"
            database_session.commit()

            #############################
            info = []
            for file_path, short_id in zip(subtitles_info, shorts_ids):
                text = subtitles_info[file_path]["text"]
                title = title_generation.generate_title(text)
                keywords = keyword_extraction.generate(text)
                keywords = list(set(keywords[:min(len(keywords), 5)]))
                description = description_generation.generate_description(text)
                info.append({
                    "description": description,
                    "title": title,
                    "keywords": keywords,
                })

                short = database_session.get(Short, short_id)
                short.description = description
                short.title = title
                keyword_file = BytesIO(bytes(dumps(keywords), encoding="utf-8"))
                keyword_file_key = file_storge.upload_file(keyword_file, ".json")
                short.key_words_file_key = keyword_file_key
                short.state = "described"
                database_session.commit()
            unprocessed_upload.upload_state = "described"
            database_session.commit()
            ############################
            
            common_interpretation = []

            for file_path, shorts_id in zip(subtitles_info, shorts_ids):
                cur_interptrtation = interpretation.get_interpretation(subtitles_info[file_path]["text"], file_path)
                common_interpretation.append(cur_interpretation)

                short = database_session.get(Short, short_id)
                short.interpretation = cur_interptrtation
                short.state = "interpreted"
                database_session.commit()
            unprocessed_upload.upload_state = "interpreted"
            database_session.commit()
            
            ############################
else:
    print("No unprocessed uploads")
    sleep(15)
