From cd5946f8556168d2a17f68ec94805e2172d5d523 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Fri, 9 Feb 2024 19:24:56 -0700 Subject: [PATCH 01/40] Added Sphynx documentation and GitHub action to auto-deploy it to GitHub pages. Also added docstrings to everything. --- .github/workflows/docs.yml | 49 ++++++++++ .gitignore | 6 ++ .vscode/settings.json | 3 + README.md | 90 +++--------------- docs/Makefile | 20 ++++ docs/api.rst | 8 ++ docs/conf.py | 70 ++++++++++++++ docs/index.rst | 35 +++++++ docs/installation.rst | 51 ++++++++++ docs/make.bat | 35 +++++++ docs/overview.rst | 30 ++++++ docs/requirements.txt | 2 + docs/sitcom-simulator-logo.png | Bin 0 -> 18680 bytes docs/usage.rst | 63 ++++++++++++ pyproject.toml | 15 +-- requirements.txt | 2 - sitcom_simulator/__init__.py | 12 +-- sitcom_simulator/auto.py | 77 +++++++++++++++ sitcom_simulator/cli.py | 9 +- .../{image_generator => image}/__init__.py | 0 .../image_generator.py | 26 +++-- .../integrations}/__init__.py | 0 .../integrations/pillow.py | 7 ++ .../integrations/stability.py | 8 +- sitcom_simulator/models.py | 86 ++++++++++++++++- .../{music_generator => music}/__init__.py | 0 .../integrations}/__init__.py | 0 .../integrations/freepd.py | 18 +++- .../music_generator.py | 18 ++++ .../{script_generator => script}/__init__.py | 0 .../script/integrations/__init__.py | 0 .../script/integrations/chatgpt/__init__.py | 0 .../integrations/chatgpt/chatgpt.py | 7 ++ .../integrations/chatgpt/instructions.py | 0 .../script/integrations/fakeyou/__init__.py | 0 .../fakeyou/character_extractor.py | 31 ++++-- .../fakeyou/character_selector.py | 7 +- .../integrations/fakeyou/characters.toml | 0 .../integrations/fakeyou/narrators.py | 0 .../{script_generator => script}/llm.py | 0 .../script_generator.py | 16 ++-- sitcom_simulator/sitcom_creator.py | 63 ------------ sitcom_simulator/speech/__init__.py | 1 + .../speech/integrations/__init__.py | 0 .../integrations/fakeyou.py | 36 ++++++- .../integrations/gtts.py | 9 +- .../speech_generator.py | 19 ++++ sitcom_simulator/speech_generator/__init__.py | 1 - sitcom_simulator/user_input.py | 23 ++++- .../{video_generator => video}/__init__.py | 0 .../video/integrations/__init__.py | 0 .../integrations/ffmpeg.py | 49 ++++++++-- .../video/integrations/moviepy.py | 68 +++++++++++++ sitcom_simulator/video/video_generator.py | 33 +++++++ .../video_generator/integrations/moviepy.py | 68 ------------- .../video_generator/video_generator.py | 22 ----- 56 files changed, 900 insertions(+), 293 deletions(-) create mode 100644 .github/workflows/docs.yml create mode 100644 .vscode/settings.json create mode 100644 docs/Makefile create mode 100644 docs/api.rst create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/installation.rst create mode 100644 docs/make.bat create mode 100644 docs/overview.rst create mode 100644 docs/requirements.txt create mode 100644 docs/sitcom-simulator-logo.png create mode 100644 docs/usage.rst create mode 100644 sitcom_simulator/auto.py rename sitcom_simulator/{image_generator => image}/__init__.py (100%) rename sitcom_simulator/{image_generator => image}/image_generator.py (66%) rename sitcom_simulator/{script_generator/integrations/chatgpt => image/integrations}/__init__.py (100%) rename sitcom_simulator/{image_generator => image}/integrations/pillow.py (65%) rename sitcom_simulator/{image_generator => image}/integrations/stability.py (83%) rename sitcom_simulator/{music_generator => music}/__init__.py (100%) rename sitcom_simulator/{script_generator/integrations/fakeyou => music/integrations}/__init__.py (100%) rename sitcom_simulator/{music_generator => music}/integrations/freepd.py (77%) rename sitcom_simulator/{music_generator => music}/music_generator.py (62%) rename sitcom_simulator/{script_generator => script}/__init__.py (100%) create mode 100644 sitcom_simulator/script/integrations/__init__.py create mode 100644 sitcom_simulator/script/integrations/chatgpt/__init__.py rename sitcom_simulator/{script_generator => script}/integrations/chatgpt/chatgpt.py (54%) rename sitcom_simulator/{script_generator => script}/integrations/chatgpt/instructions.py (100%) create mode 100644 sitcom_simulator/script/integrations/fakeyou/__init__.py rename sitcom_simulator/{script_generator => script}/integrations/fakeyou/character_extractor.py (73%) rename sitcom_simulator/{script_generator => script}/integrations/fakeyou/character_selector.py (89%) rename sitcom_simulator/{script_generator => script}/integrations/fakeyou/characters.toml (100%) rename sitcom_simulator/{script_generator => script}/integrations/fakeyou/narrators.py (100%) rename sitcom_simulator/{script_generator => script}/llm.py (100%) rename sitcom_simulator/{script_generator => script}/script_generator.py (86%) delete mode 100644 sitcom_simulator/sitcom_creator.py create mode 100644 sitcom_simulator/speech/__init__.py create mode 100644 sitcom_simulator/speech/integrations/__init__.py rename sitcom_simulator/{speech_generator => speech}/integrations/fakeyou.py (83%) rename sitcom_simulator/{speech_generator => speech}/integrations/gtts.py (64%) rename sitcom_simulator/{speech_generator => speech}/speech_generator.py (56%) delete mode 100644 sitcom_simulator/speech_generator/__init__.py rename sitcom_simulator/{video_generator => video}/__init__.py (100%) create mode 100644 sitcom_simulator/video/integrations/__init__.py rename sitcom_simulator/{video_generator => video}/integrations/ffmpeg.py (75%) create mode 100644 sitcom_simulator/video/integrations/moviepy.py create mode 100644 sitcom_simulator/video/video_generator.py delete mode 100644 sitcom_simulator/video_generator/integrations/moviepy.py delete mode 100644 sitcom_simulator/video_generator/video_generator.py diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..017cc24 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,49 @@ +name: Build and Deploy Documentation + +on: + push: + branches: ["master"] + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v1 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install Python Packages + run: pip install sphinx && pip install -r docs/requirements.txt && pip install -r requirements.txt + - name: Build Docs + run: sphinx-build -M html ./docs ./docs/_build/ + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: ./docs/_build/html/ + + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index 4697ddb..ad6eb22 100644 --- a/.gitignore +++ b/.gitignore @@ -12,12 +12,18 @@ ffmpeg.exe ffprobe.exe *.ttf *.mp4 +# Sphinx +docs/_build/ +docs/_autosummary/ +docs/_static/ +docs/doctrees/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class + # C extensions *.so diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..a7d0fc7 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "esbonio.sphinx.confDir": "" +} \ No newline at end of file diff --git a/README.md b/README.md index 8c2c3e2..733dd09 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,11 @@ A highly-customizable tool that automatically creates AI-generated meme videos `pip install sitcom-simulator` -## Examples +## Documentation + +[View the documentation](https://joshmoody24.github.io/sitcom-simulator/) for setup instructions and code samples. + +## Example Videos

@@ -67,81 +71,6 @@ create_sitcom( ) ``` -Power users can completely customize the video creation process: - -```python -from sitcom_simulator import ( - script_from_file, - add_voices, - add_images, - add_music, - render_video, -) - -def upload_to_s3(index, file_path): - ... # arbitrary code - -initial_script = script_from_file("custom_script.toml") - -script_with_voices = add_voices( - initial_script, - engine="fakeyou", - on_voice_generated=upload_to_s3) - -script_with_images = add_images( - script_with_voices, - engine="stability", - on_image_generated=upload_to_s3) - -script_with_music = add_music(script_with_images) - -render_video( - script=final_script, - font="Papyrus", - output_path=f"./{final_script.metadata.title}.mp4") -``` - -More documentation on the advanced features will be coming soon. - -## Getting Started - -Several things must be completed before running Sitcom Simulator for the first time. - -### Prerequisites -- Python 3 -- [ffmpeg](https://ffmpeg.org/download.html) (see below for more details) -- Stability API key (get one [here](https://beta.dreamstudio.ai/membership?tab=apiKeys)) -- OpenAI API key (get one [here](https://openai.com/api/)) - -#### FFmpeg - -The ffmpeg command must be accessible on your machine. This will vary depending on your system, but you can install it from the [official download page](https://ffmpeg.org/download.html) or various package managers, e.g., `apt install ffmpeg` on Debian/Ubuntu, `brew install ffmpeg` on Mac, etc. - -Alternatively, instead of installing ffmpeg on your system, you can place the `ffmpeg` and `ffprobe` binaries in your project's root directory, which will work equally well. - -### Environment Variables - -This package requires API keys from OpenAI and Stability AI to be stored in environment variables. - -First, acquire API keys for OpenAI and Stability AI (see [prerequisites](#prerequisites)) - -How you set the environment variables will depend on your use case: - -#### Comamnd Line - -Set the environments in the terminal, i.e., `export OPENAI_API_KEY=` (Linux) `set OPENAI_API_KEY=` (Windows) - -#### Python Projects - -Create a `.env` file in your project's root directory, with the following structure: - -```bash -STABILITY_API_KEY='your_key_here' -OPENAI_API_KEY='your_key_here' -``` - -The `.env` file will be automatically detected by the program. - ## How it Works Sitcom Simulator is essentially duct tape that combines multiple different AI tools into one unholy abomination. @@ -153,6 +82,11 @@ Sitcom Simulator is essentially duct tape that combines multiple different AI to ## Contributions -Want to help work on this project? I'm down! Feel free to reach out to me if you want to contribute or have any questions :) +Want to help work on this project? I'm down! [Contact me](https://joshmoody.org/contact/) if you want to contribute or have any questions :) + +Have fun!!! + +## Links -Have fun!!! \ No newline at end of file +- [Documentation](https://joshmoody24.github.io/sitcom-simulator/) +- Web App (coming soon) \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..4ecc569 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,8 @@ +API +=== + +.. autosummary:: + :toctree: _autosummary + :recursive: + + sitcom_simulator \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..6dcfe2d --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,70 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +import tomllib +import os +import sys +sys.path.insert(0, os.path.abspath('..')) # enable importing sitcom_simulator + +with open('../pyproject.toml', 'rb') as pyproject: + pyproject = tomllib.load(pyproject) + +project = pyproject['project']['name'] +author = pyproject['project']['authors'][0]['name'] +copyright = f'2024, {author}' +version = pyproject['project']['version'] +release = version + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + 'sphinx.ext.duration', + 'sphinx.ext.doctest', + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx_autodoc_typehints', + ] + +autosummary_generate = True +autodoc_typehints = "description" # description, signature, none +autodoc_typehints_format = "short" +autodoc_default_options = { + 'members': True, + 'member-order': 'bysource', + 'special-members': '__init__', + 'undoc-members': True, + 'exclude-members': '__weakref__', + # make function params alphabetical +} + + +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'furo' # 'alabaster' +html_static_path = ['_static'] + +html_logo = "sitcom-simulator-logo.png" +html_theme_options = { + "light_css_variables": { + "color-brand-primary": "green", + "color-brand-content": "green", + "color-admonition-background": "green", + }, + "dark_css_variables": { + "color-brand-primary": "springgreen", + "color-brand-content": "springgreen", + "color-admonition-background": "green", + }, +} \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..35e99bb --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,35 @@ +.. Sitcom Simulator documentation master file, created by + sphinx-quickstart on Sat Feb 10 23:00:30 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Sitcom Simulator +============================================ + +**Sitcom Simulator** is a highly-customizable tool for automatically creating AI-generated meme videos. It combines numerous generative AI tools like `ChatGPT `_, `Stable Diffusion `_, and `FakeYou `_ to create short, funny videos in many styles. + +.. toctree:: + :maxdepth: 2 + :caption: Getting Started + + overview + installation + usage + +.. toctree:: + :maxdepth: 2 + :caption: Reference + + api + +Index +---------------- + +* :ref:`genindex` +* :ref:`modindex` + +External Links +---------------- + +* `GitHub `_ +* `PyPi `_ \ No newline at end of file diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000..d8a8814 --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,51 @@ +Installation +============= + +``pip install sitcom-simulator`` + +Dependencies +------------ + +You will need the following dependencies before running Sitcom Simulator for the first time: + +* `Python 3.11 `_ or later +* `FFmpeg `_ +* `Stability API key `_ +* `OpenAI API key `_ + +FFmpeg +^^^^^^ + +The ``ffmpeg`` command must be accessible on your machine. This will vary depending on your system, but you can install it from the `official FFmpeg download page `_ or various package managers, e.g., ``apt install ffmpeg`` on Debian/Ubuntu, ``brew install ffmpeg`` on Mac, etc. + +Alternatively, instead of installing ffmpeg on your system, you can place the ``ffmpeg`` and ``ffprobe`` binaries in your project's root directory, which will work equally well. + +Environment Variables +--------------------- + +This package requires API keys from OpenAI and Stability AI to be stored in environment variables. + +How you set the environment variables will depend on your use case, as explained below. + +Command Line +^^^^^^^^^^^^ + +Set the environments in the terminal: + +Linux: ``export OPENAI_API_KEY=`` + +Windows: ``set OPENAI_API_KEY=`` + +Python Projects +^^^^^^^^^^^^^^^ + +Create a ``.env`` file in your project's root directory, with the following structure: + +.. code-block:: bash + + STABILITY_API_KEY='your_key_here + OPENAI_API_KEY='your_key_here + +The ``.env`` file will be automatically detected by the program. + +You're ready to make your first meme video! \ No newline at end of file diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..954237b --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/overview.rst b/docs/overview.rst new file mode 100644 index 0000000..a7ed996 --- /dev/null +++ b/docs/overview.rst @@ -0,0 +1,30 @@ +Overview +================ + +What is Sitcom Simulator? +----------------------------- + +Sitcom Simulator is a tool for auto-generating meme videos from text prompts. +The user enters a prompt, say, ``Mario and Luigi summon a demon``, +and the program generates a short video on that topic. + +Sitcom Simulator design is focused on the following goals: + +* **Ease of use**: The user should be able to generate a video with minimal effort. +* **Customization**: The user should be able to customize the video extensively. +* **Quality**: The user should be able to generate a video that is at least somewhat entertaining. +* **Speed**: The user should be able to generate a video within a few minutes. +* **Cost-effectiveness**: The user should be able to generate a video for pennies at most. + +How does it work? +----------------------------- + +Sitcom Simulator is essentially duct tape that combines multiple different AI tools into one unholy abomination. + +#. `ChatGPT `_ generates the video script +#. `FakeYou `_ generates voices for the characters +#. `Stable Diffusion `_ generates images for the characters +#. `Freepd `_ provides the background music +#. `FFmpeg `_ connects the images and voices into a movie + +Sitcom Simulator is available as a command line tool or as a python module. Continue following the documentation to learn how to use install and use it. \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..8e8fd36 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +furo +sphinx-autodoc-typehints \ No newline at end of file diff --git a/docs/sitcom-simulator-logo.png b/docs/sitcom-simulator-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..f1bfc3914a18fc2179bcfec1b6b9f1f6cf070870 GIT binary patch literal 18680 zcmV)7K*zs{P)`{1lt5Lh%8bz3#j0N*0pLY)VA2ZR-o4U+*+v%Kb}^=x)iGq zC?YP;2PlgIm4r(V+ub8WAHx+#_5wkdG_Proa1xveLTm% z2=p6^hM!-NlL*v~#HsUYT_1cVaBzsaP)_q2;xGVcMlrEavGn0`lIDoa&#KAQMSADmMThWR9Y(FT>6MdiJ& zP~oo~os{rgmp0}s*3KFUZJ5(5HYJu=~-wOH^{ zotJV72dC}=XnFuxPm`=JwQC5^qQi|;F0pmJWRPNabnMmv;o))4rj|AoAj3QmnW zr=mjYuh-QoE2W~=G79kO4k@`BD?sTvbUS1~fDBDxSm~bUXv4G7h{hp4HzFN%4jgo? z@K(u0PizIgC@-*tLfp3Ru3)0v*M0B>W0JfSIS_uHe&;&8 zezbwM4nRUxsL6_lLLL|=56rY$KT2-sDl658%o}eqNDb{l=CRLFqiRR@h4eZ6MW%dGoh4(ujSQq1&+Wpa>_sy9$gvH^i%~pI$^qy7cV@peO`FQ0_ao(Kz+#d zYDFhQLdpf2&(tk8q1oV3ILmcA#T(+KWE1qWT2VQQ0S`7G;jJNN5AGu!IZ9#{6wwEx zSJ1#1QNa^Hc*S+LKp94W(m+As&4K%^1`bBXGXHDNN5CpPCsp#pnMm3)vONHsuo1#3 z%a#c(i9RSSo(G*gGmJC#!x@{xbumK-L7fm$_Bsb5~`LdiiI0*Uibbt}W8`KpHuuIMi*CW{1EN5a3H18?M3#_!~blltw>8%&{yeU>R-=nm*-8lbN523sv$sb(^K#^In6~NTvmDL1pMmz`$s`=}Y7tumuHbqGAL% z+Q^=Z>)>aB=_v{dk(S;Uk`K=OVOmk*l?|+-tS^4++uX-08FHpc;w9eKX(CEqiwE{n zjpBh^BxX=M76<2>6g<(MhX^3UE~op(QBF2FIU1u5OD>`mggO)-B7o4*gUQT-)XV52Bddx%ByolVK_z^tj|op#dIB~m znxoFpC<%z2f6f$={+Q^9n3KcxR?dK9nac4q9F{3BX8O%xH4!fO0rnoO9gxQCpopXK zXKb=?tGdC-ECFMnXkch4^_eNUn#1rG2e6kDAMMm4+ikD-S4?jSGUc;E3+f92M{T+W zg$GB9E|&$HV3p^}K9;^i_gRJ{y6;RK56Ta?HguC{RYbXzFAC|QE!48~(u_rpP-(Tq z6ZoRTrq?7)Dse&7aJn2BeosHLxl!JqJc1W$ij^-KuPd&z5~SaQomWb*~7 zXjPbT^{fgFb=}l%MPX&OS{~#X8!ZVQx?jFlXT^>A(l}`fIh|kd3SS`0s=S+~o36)6 zUAdE98*P1`nNP}FK`D_QXeI(=Osw!t&pI7kDCGd5b4J&zJ~-x^HvU-&ThIWh&X%Tw zev*cI?c|<#qvY9IUM6#Kne@WeY;rEXF3S8f)2HP-<>)(w^kmyOL+q6Tmvi32R56)i z=~)3AQUV@#luzB}wS%ix?8*%=Q>co{lH& zccubsMa5$|J}1;9j7@iRq@ZheLGYf2rQi7_toX^@LD^SDA?y~A`7;lG8O(muJlNw8 z_i4n@<%{%p*>`S%MSu1~=m*~Tv*Y33b8m_GZY?nP{rkY7-+ULJmr2oQ);$GF&$|Uy z{CG*C@8f;Y_sxO6p&hdjtKcosS3u+=xd+V|4^KZ9_A7IR0+>B}c3b+cSg`^YFJ5fv`?)LORi8XE z(0E_=D%!KEk&XB{C&?|E_g^3Qe(UEi4@UDA(~HsYLLj~)ZfLylSn{=NVdfz(gWcb^ z54=3!7EO_k=A5Ea&E*URtFO5~7@nQuZ=ds6`2U)#AAsS(NUE2Igu>}`4u#%TCMr#Z z+SlH&mR`;xe~Xn?goLs5xe-?if=?dj)-U`lGKv5d`~x5O0KE0BZ;j0E7LfT17cPWz z&N(OkMazBfxxWm8`8t^UzE>0dHCAkek3IHSIP0vlx@rgUTeN5q@M^CB_$=1(Lc!T* zpAE+ycU;EDCY?~xZ~gqw*lP|--;+;1IsU%-_#Z?XAM(xjz|4bQ%41WfT!C3A!{f|g zfI^sl+ZQf}yS{b}*WWK*2#6Qb0Qs`~>ib}II96VKHym*4TcSr~MW9GOhr=X~fB*M? z51;wWXP_S-+GEr-|1S>aAwx>c21Ae)evmfM;GyA72|V?VK`lJDe%w zJQu2uI91>P^53*rM*XJFt7pgytzWSgE_uT@SlLfJ@kDs+vB%)5tFDU5(<{@FDEJ&c zj|X}Pj2x~${`38Hmu`sA9wb!Wx*+xInc&HfmjEgBYiIoJS6(2kTORZ9`d+7(gYBVgZ9@ensOzEMGb za#v{Qv>1^%mQCVWmGP-yP(TBPfDwk4eHZ1e3ZDNy#&KY1(~qR8*mhOZH7TEY(?Mnp zHDXAw2<3hL4eBqXzjXNAX~>3vq68GiOON^nti5wpWDW}AX{Vjm7qgpygqtiXdJ{Yd zKPJY1yO2Kd3CF}}Oz#c(1*EAqg2Mo$FDh3_-^6cJfLOE4H;Jg!PUMo`WZXaYI9ZWrm^f4n%RK zo&1*)hG2ElkuhjNHU#aBTvanY`(m=|Hds~hWyy0UPG1G@Bh$UbcMWH$l z9Wd0R$Ro~!`vH{JZ;c*4@#n@s-?eu>$VxR7@6rHBUsPCxG&EZTB#c7e#C!DYsC*k% zu8m5Gcx@@ZW53bki$VHXFn%{DuL}}!fFBP34n6eH_!s3%d(+3z>jxNKKP!$_IFfJ_ zt%`A#muXZvsET2EQ|$^IhXa)&mK=OPRvwZIZX}2OP7DPk#xFhZrdTJ1hFcO)xKJ<= z7JcZu+y_5Ocrz*tNmzyrcLt?H`jRo-()7iUTr2(h{$5*aC z2ipVGR!40GfKk(uuP=(fQP`g^`l8ZP2z~X3&XZhCK5h*>N7^Gk-6lte1xN?Pk8tDs z%@D@#1g!AaEDP;C!#aWI2*dO6&J2;|zF31@5udu`N4#R}i*DU3b`T3qTIQ{g~ zqt`|+7ZX)kxG*_;+bNgDG8l0@AD+DgGJc5Lz31JiJl$}3sK@A!-tl0REsQ3R7sDFA zBY2gp)B0F?bf*$@WwN~Mx8-}CO@Km*N4I=J!A7Hc#u;aX zDX}}kRNc!X&q>+)iRhZ}qO}>lC=ud@G{NaJ9PZ40?`yInf}sJ9_dTe@Hw#@-rS;6zIK9?_FYC(&K&f zezghE7$6f-=|=- zO3(=h#1~h*b>pe94AOaBFshr@Jq3^6`RichA7!uHEp$?Lf{QM?D6Y1{Y=5`%bixTI zM5SDL$?Y)ncV7+;hBuDKs1@jgKW&GMk|G=1DH(30MEK=bhkw|wDZHgvjKyK7wXkq@apjyjol-epes3|-$2H4<)?Y4nys+!ClFM1!^GeIn3=7eJ!>=EWe>+7sNdI1P zEePJLRdzLf6FqF@wGXm7)<)#*p#FMnv=E+?bB#dtl?(R6xMK3I)1%UnbsKbiL!mmY zXD(l$DPhy&PloA|-@xRT{1)h~l>33LaUQ-G0qWLc6Ch2yQF>BXqz}>?p<6@LuOsN? zxmUjOm9hNVJO2ZE0qg@BmQK^1oWsb5p_6>0eD*qZN||`|&YFOFagOv(Z_<&zMJdEt_~Tudr34=iIeOtAK8MC^G$qBXJSG(Dc_RyU zh}kVrBrTh{$n~sqq~J0e$LFI(G>r>y_^+Pu_f=j(N#lk*ZU?;@$iZ*!Sjko&$7=m# zp}PfIPmLkJ&f>1V?%&l*1OtTSLtLr#-wk%yb61#o;O{`M(67Sx0wfD0hus3oQ#S^W zoLDDkh7m?TeKiZ-qU?@eo@p z>J#92yce(04`8^jr*1Z^{0-15yMY#7_^mIb-&$bP6HkUHs&|EdUZD^E;YKW@_znti!{t#t>4(7h+&=9)L8?_t~;npwyC}eMc1HA$! zIS=^kAA!lrL=Xn7xa^mGD<{IM{`T##_uu?J#-v(K_(&octF^-nvXWwN;P3&SD| ziF#Rc-wj}$sPJp=T)`Hye{e7ZmHjm;W$KIH0K8rEz2CdKKZOs1i9+t#j61Q|oV9UL z;rmGhP1$8SEcnRVWf`VQ_U!%9cLd{jb@ce%u9m~`DISw|*)g8?G8>?Iy7nZO#d)s~w~w;>@r&XEjGr4pE!!mWn(pQ zyUB4z10x75-_5J7I5a~%5W5N6ZWE0KDp@y%TbG%6;QT!9On5o9)ieG@gEDRY?C9lt zkqI)26T*X4QM;`8^uk)cn*McmD$aE~h9|-H`62J++j0>X z&vu8I)|*aDzyuT8s>;z0%gEh|oaAi;ZmQKYt>exSGI@E^%hh%V-S({(O8KvSM( zN$k`MnVC&bq})g^-jtMN#ZP|;hvWnh@2~G^lTs;<^Vi(~;efJ%%8r2i>xNJCDrp*a zEea9FJ3-4GD0#+ci9-5brgzCG;;^7!7)0UwSy&_>i8!F>x}{%G6Zy~|0!f?Cr2Ad3 zrB9WpRXd=pjhe25oW=L%U0n+26Swf~Hd-my>1di)#s~oCIWhd-ZMWTeO7`)TOc2C@ z->m`h(S9_QI%Vja~XSYvFv_)mT^lc|IUcD?QCP)e7{$v2);Hwo zaGClRT}uh`3iLK}POW&gOKzK?HZ3P>=D{>(7VYQhI96~*W~66N*G;I_^lE22&>ZM=_T(r%Rs-(W-7Vb9(8S}zKMhA8b&vk@*j{G%DKOM=G_UjO@7!wr9T zPP~o=1#zN2bOU;wKGxqaPv|=cy~04__YBFaYn{uo*0S5or5&29qD;Qu2CCXAd-jpy zTzP#G{8q(A!e?QnBuuwfuABG4X&R&(kc7?j!IT}rRmP&47luktOmn9FHl^w=J$z|Q zdQ~pkHFo7?H^Gg6cQ#yo!dKy%a4h@&!dOP{SvMNcf)Y%K?fUxt`FgJ}ieP&bxe;F1 zocOhf*WE$+F+xM!d)-0DhSP%$>G~Oq53KNdL7R!3MF(Z4T5-7if?JN@+Ch=Dqr?D= z4!WEwKjkGUbBlt}%@~Nbo5nYy&~uQXKqk|84_}<=7U=nD^Ir;jh|w3s@uZBa)y15< zQ1=FCCROV=4MO=5Ls%uaC{(??r+^BLyufFWKD0~~$LziC6OVjC7#||2NAFs0?qMX> z%UByjdwjRQ2``NNdR>-{-racR0rM(oJi6VWL_aea;FMI;>i(jEUEgp3Ecw>OJ#`$J+v_Wn?SQQEV@S7G4NV@6Auxc5;XmHbTG9 z&)bIm3`}S|`sSPg4>d!C_;lJ*YfC`W8~SIzT)=XAwQEuy~LJx%R4s2ouT)ICE+Ql zlp$=Ii$-Rk#otoQr%f2kJ<>Kkz5xdH@bo=y8$OLN4Jb1z`-1n~keUu{__SB}J9(Fx zX{xaRRQA31-g{^{kPnS_W zgc+~dt8E>K3oe`*CSHgOdUdR~QwTldfLHP-um**0LiJ(gQ}nB^W{3wi!ltkI>IHJ% zg#XP@m8Z#yj7ZYRjTGM8HF|#fJFShyrx#~P0FXfaczp~nk>U92VJAVtgbiE7fk1m+ z;ONHCf%^~rLH6(^ExFxnbWn&p0UAK0FMV3MS0zJ!pmE0M(W`d~Q76A-C)hsldd2|@ z8Y7P;yl`r;U%b!&;jj(s>1H8TGLC}`vjn;c)rS$zBWPt88PlIY7bh%Z_Hu~XUgq%* zi|HS)yaHs(Pu>Jeh~!t9!0{A-SN{^+c=Fd_$+s_nZULudaCxoXE{Nvfk*Tzll%|Bb zH4^hb_%_)8@BSR_{o&87yZ;8(+;9i< z3aC8qy8b&rTZ(BOI44_;ZF*uo{NTVp>}jfr=u3O5m$W7Nq9N_&O}x49d>tJ8j~|V{ zZ#(T@!*MS30wrE_P2)w1-s>BJlV36;(DU5LtJ_XLw`bg5NO)(!d-BdZDb373=)L#+ zHYL3iq4%)UvUt|)*EQB}H^AT|m+Yb;>a1po^>yb!?|HfF>kjHk;b{ruuq-42dY+h9 z7$BsVZL$D(Q4%NC&+aC4iH1#anSK7;?=i*^BPj$YL|pL;C!& z$n@Lgy|Yw^sip(;_?|+M9p=qVlfL~j@d!P6Lcp>4SFMTZiAif9Okmu9p z?G7&_prRdh{%If!#QNvS4g>UM5IQ)etUl*&i1%p$7%c>oSuxJ1wbGgEHfg6{^`uWj zn~c2A*VGh}UW`KvaONHNj%aCL2*7r+{iy7YRtxqE&h%9q5u3Er8PtAT62;c-I%Cb# z^?Ap=6Lt?q?S+87Pdqk^F=w)nQ9=o)zz#q2b1!K0vHz$45~l1jOXSk^|GDs8Uw1I{ z3m5_Q4;9F|jwBN@BR0Z`bn)!PtFgRbt8MK9UhhQP(u-@zlT|X+p+o%Q?}RB+;NUNQ z9G-7LVLST#ukie<_x&?Gf>#Q!rbRRj$jA*ANkGA5WmqVr2WO1&{-64*;7R7f^9gg` zeiSVD@E?nDSN->K)!i&O>bqxzzcbf5sl9+s_ub7h4X&EX*mE9PG7tlsUj#^1VkS7sPBFygwpSV=L6=4sN$f&PfZgu z4V39UZQdRcujfnYk?)6}`GpYmybJ8~l&mA^2gXmC4AXw6cY``3c zJr|%zrVUJd3aixWI?yYgpj?#HZ##;K>fHoN5bA|aWc|m7NNT@Nei-)q^oO??jZvZY z|MUr=jT7rpows<>Ukv%*@{_ZIk=U}v2>E_Y;QKB|90Kg^yv$j*23lI}VD^!{hxN0M zJOlVdG zGqZd$?y~QA(RQG3?00^#b5b0thnNuIA1>~prWI~&O7#9@pqC0$GFiN z1i?wsKKz&%B9Q$)bz<#18PGn{SrAsD3t36p6 zy{t)Lf1CCA-sX70BS z4-2FZs=xYYLx1=hIe~Tyo-(1<2BnWY4+k#TdFF{H;$M6Xc;b|815SwF1E4cQ&g7P`Kn18fWlUS|Qi4q@@N8Sa*%q?!8^Mp}yB3Ist7!p;~K$J!o?q zQogyrTpsG}8zJOFgPWSqd(uN%_d&8dj`PLLqE(0 ziFw_bA5FM)d*56$2#x-{&kCdfS-$j~o;!Q)evNfD>}hAE(W|hL z#z}fR9peFM9LiQT*W$&Jk@?_oRd3%^SYAfoC;uf@(r3$>o_D1cuSFG%qYk);dU6Gt)?cK+}fqI}qmiOl#2I%_R2WHFAe5ECz0d2JM(>Q`QHYCO4T zIq+I`*#Xs~Gw@AYyFp=*h}WJKkj!KfIiVuci-@54P#+L*h38NBpC@fbggdPgwq<=}~K`A&31UOkulnG6^=z5s} zV1FkISqauHC_*MF+ai2e_%bs%KLNEiwXk#DnijER757U*M4^!x8-v2zD4%1Yr@DPs z)YJ#38ZGhEYN3kw#CPWe2T;5a18QqvWyYSlN}>(#l`<+@YlKl5Z!g&Rf)4zArYX1H zVA%MwSIX?2DV1cVN(QK)J%BdO=^3B*ZzMXqY&6I)3LTumC)og_1qFW#K6qP(jUF4R ztfIc?ueThrEn295YRc=DwIz~73|kYn>Xi^k31Y3I;7y2{6t7qc!2hNSqugX~!vx6p z$q1DpuQUI8_7Y+bAqyjbmBuWfC=eBu(*`fs$inGs|FVE%?0yD4Qok8pkf{v8KNbxj_T9>1<+AOZqoEe z&cVtFWTVHz3(^kL0lHWCIG^rRNR(3F9zIbG3WY7+c zY`;M3fd>=m(QeW0l~+eUBIlhFjs)YHD^gw|x9nK)#iG(POc?Gcu&R4#)ale#%UUXn z2??&^gSyAe1!V|YVrZQbWF3afy-es_uk$5=rH@j^w#PVvQZ^7p(W)kn(}DJ?ViKKr z0}q9f^rA#nr96oKfteji?W9!+p?Vo7K$DG3Bsxo8`Zv1g2mU<0YHjLMpYXTP<3)23%<)ZAAs^k zrVitMl@(JA6ZST&DpNF|HmRQ4$27Tmg)dMp!&aI|U1<@witpHTsQY3-%#^o^5;A50 zTE5t@X|rQ>!5A*pUvNFGw4<@Mv?bmhve!^Tkcmt3sjZ<=c4DLwc+*C1L3@4XQ(33N zN00|4j){EBuW<2Lcb|jJ8v({o0@(RL5A)yOf^Bw^=VWXMY&0IY+`%JvIGA&!hv~17 z#*>gI&-lolseXh#-sWNIOGRkJa}qeX`AdlrQ(xv`kGFcDWE{~A;jv`^OE0XXjCx*Z zifa`B7N1##K6zO1AuWtvm2t65tSjKh5fIZXt8kNo5iP^%gTJWY!Rw_n3yyEW_{qxa zWf>qWiFVzEM@+)5do>+dRVc-N%33B^8py|&Qm{LTY0eK=J5NbF9EE$ zGCjA=P9C;@Sx~0?Ts8tDu-`{K%s#>dU(}q-bdMLBUTNMwyCKZ-T25r(#E-E1DDi0%68EKD% z#Vafo@7X&kZxP_$3#vd5DLVZXRWyW5GZ1F@NV_Vu3thqd_XVT1O(~&hpgmmo57I6g zHZ-URXu#Lq2a%6+gHc>?yc)Z!-qFd%hdSZo;Px{C=Dpv;p6?eqWpArA&jS=PZWC`- zSHvNf<;0UKM3t%41Vv zLqfq^0!j-7xub-K2Nl+Or@}C6jfrUYU+$x_PYPaq-;V?V-BXe&OTH71^8jx9X9u%h zu)K;Q$Z%|U2%^$qzt8|(_YcX4%sEndlR{Wg0q4I@1o7cOmz%!~(Rl9n z5%Fr+N{X~9AwXvA<^RhCj}D24e~m`Q@st$816L$oF1-+qgNyYhx~t)i2V;zOI}+g4 zCwb-_8tD~Rrh4xFPB4tGNd5F?T?0i6puD%_hh!av40wp5#J6U-0_n3JTX;$B>TsT) z*HM|b)p|Ak*bb0{%jpEyx$D<7<*&R%V&ACC_kFq!PH&dsz`RMNu9ds8%HQIxF#X1u zPl7E>a78_GeMR=9D0Haokd-HH5%s>Llo;Yacs)ql3QwojL#Oav(S&gG!i4F0r{)KL z0S1r6@G3+C82%@rWrpPY<+fH{yQe(Fk~S;uidqqRS3(r^O zP=3x4!9X663?%kt^Tzykq}dN3;V^!i3bxrXDD#e0KBP}Co*u4Gr+Wzx-b^EJy}bBL z$ja{E$gjE>Q9$Mc@evKqVW%d1(3md2T=+t7cN)47O+j9Do)p@j_g)wKhOqof!bjmn z{4qQ#Xc>=@8V_8P{1>vyUnr)wT=98!els)UQcZaF!1OVF(eR7Ge+bw(wgQQ;3+?dCDQNc3d^+o~n z-&;k+pR}X&QR75g2?2oex0R607{TDVUg4rpFAv^g^M*QoFrq+6UJp1_qr=yP1hNNl z^6Ie`5;lApYQ~xBru(XY1+CKO#56y$kmY;#Ug)`xcs}>7soi@o@EJD|eplA-wL*)? zSw+;FX;e|sm*y?r4?7ie6iVrI|5_NqRj}_#sqQ7`dA?`AlOVM6nO5<2Q?=2=tOHVe z*tUKZn%^(i`@$=kujv5@oojjo4VzVHeNYZhuBX0fdFNMUy{07?JweCa4zLt|$bIdv z{=MccoU<0jmyE0tRs?_rOk6QSTg`hkQE`!)>5FKNrJF_-WvabD}*Rosdn6J zBq;s3fgArbLe^7#lF%(Z88OzN%JkbXGWDRr?aTy_1hA#ZB zjCZo_Kb*(xXsj_30Nzoj#5p~?^J}W;FqwN@b~;Xl-9=xy77cUpn3?OJLGJr zk-T{yQ)UW8T(C_Pyrmblg5pb^OD{?lMMY79-{dvy4lP!h>`@q{mSe*N6!L*H?HR+U zY3S{4yqYIVvv;3!ga|w;J}TqJhkQ<`DWO$$xSC-iWD~Bns74tX1XSe2PpyE>hGpji z5-rx7MvM1*tI;0JV(@xCli)-MK!4>60 z@h8nr=su=)*4*6m5?lgI+O43IwZFQ%V33Q_l9CiHU?F40&ob7OP@?(SZb>8}`peNPNw-glF-FTe7+Q0O^s9zFZMCyo;a6KY=ClGF*| zE{Z70IO#BYiMhv&1S6B$j7G=dxc=GL4&sDLBK=VY)TO_k+XW8k4ZR}$y~35Cl5)9z zulgd!0}Ma7+q$g6>ESz{jhANaPePWS+WCNyU_hUR2XAPF9Le~Mzsexh5>=dE=6>rJ zs=VssrVT_-O(Rv6!kcJ-Q-lcU<%y0gKk?6ViVi}XO&eN~Un~ACJu&w!;|iMDcC36V zU^&(u%I$mg?uB;%cD~KlO*zYQt%9N}8$l)ezQB3yJ@-I6z#cB7lQlQr7~h%3{nBK* ziU?RSgVYa;EZ9p)IRIoqfCDI}3=ENk2(lhqJwuB?KsB$jryAxQK0Y45x$8Mtd!L7A zpY-t9{XTesRuI+}T>8OHaM%|nz;C^x;`(w`O34Msk_kqjvsK>XE#spAp(3F0JP-`c zlMj2?@1*fi+h@#^^6LIUtzG#IU>0Q5XsCcsD|?Z7r}?0HDZd{MG(y~G1?AqfAx#(3 zG}H7wF`7w+ZT=si!c_%d1s|I20Vs?ZfaM|oP4_uUGW{0}*@t*B5)&q=XUx=Ax`v*7 z`IXN=6TI?I$A@XZsxHS(U=~V=qbd8&=f3H@vdXXf>6_|N*Sy6+zaov{{F-0h2<-qB z9+P$p!*Lj1So!lSVE1F*$@<4s#RgjH1p;FL0IU_3>%4l35cJj>NqMpO2Ba=a)kVSWdlPNLQBq_(x#I@Q2-sv=^XGDtaY}XI82#ZpeDF@HUYEx{`I&j|nGl(e z-adK<-3O?T&uQFg?ib@|qRk%1OpIsOeSTB)kaJMkb9v+Cxv#cAlXgt`2V8gpuAD9t z8buEXxTr|uRnj##1yJzF*=NkNDfzf6L@79nG3W4cuv?6l#_@sYvkpj0IFUfnW6jOKh+9tKcg)zMMaWE}kejSe;Z2M;k5I|uzW5s-D=eO}eCEw6Q%9~p zTh_GjBw5`Df3X>EKI7@&X~)6ue&R)`xOZlmE<#i&z%X!}?Son(Dvf3wRNlgdX_Sx& zWA*t}40#{9D|wIEhmQw}5GsyN9G9nN>CdF;BW#Q|uJLi|33S9y3wi!6jjz!on%s(x z$e_Scbu;}*b;+x{2_C%R*+?H$zI{I|5l{&izIQ|PlFx2*DXi!E)lkk;{zpX`1ZHJf zf=SbbeoMZ$85aD>#8e*u)FR3N3qSF6{F{{5fsVp}=v72wbPwysc951+cN+=AunDSe zU55iRoXUNZ6`+ora%xzb3xdhL=>5f5mD*CCw00Xo1s zqCSQ75$^uZx2zX{Y_YTz16MCoWZmQY+2BA=cX(}~m36aq6~A+4Gxd7}tS3(Q$U4s2 ze|%J!RX08h58u@S^D&1!@LO_z8pmN-u2$OMi^R zu1zs&vz{%@5-j}WQ!$D{?>_$zUaaJ01Zp0d=%8eH!lV&6@Dtm_wibWu8F=iz7Stj= zFTBw_tIhCuHRTLdL;HnCcUD*QT)RmWQxpMu8eGXD$Q1SBZ+sa>fyH`h?a7eG@pqs1 zEtvP-KZEhX%e&gxn-~V5y|awPzL?@&KKLR?VI{q{oUEr)*t0Sk%d77Z zj*k1{d=V981Z8}VAnz~hjWO`=9&dS3G+65%crFO_lfmPS%X9cH;J%rohk5ULQRqiz zp-TKYqGP_6>M1`lo!UF>bCWp=LU{7w=a@b*MD`_*DUF5SUC;!`!1;ns zfEJud)5pP~pPK~NpYmj^e^rP$hz3ZnjSq#y=#41~K|HZ+GRh=hRA13PC_ZJm z+Iy_Oo8#+K>b-p&o}flufHeu#v;k9OKGJTV1M$Z;yrtF51gqZ@k|gy z@4oEEKZH?Wp;q{X`8aIT`X^xVKmQ}__tDRq5dnGH02 z`iEaDR5l^9vkbLzJ$tCHLkShw zkddf$4LygV>mzrihi2_h-W}>~Z@xd5P31R(HqtIYNUX7=xzZ9A}(gBh$kr%v;@`ChPb>n8Z>5M1gcRxOv9Z0SJ z-ig=}{%mA`4FiSWBI$YPNfEyp63C;rK`~+u+SHr-z4**gy*t9~r3fDuj;rY8_20Yz zX1w~q8d9|(PH`{En^WFeZ}gxnlqpvLXCTG<2_FkNZ63-tG9)05@KF+yLm{dh& zkJC4*j2-e3BMQupibK&D&l?j8lNvI*~aD-?HFCt|2U_{qq^3*r!P0d)HFbty4$#DtZ~i0uEnbe z(!0{D)@9HBwK7JECG^TN<~7ZvZ?^wZ7vyt0rD}zx6df+Fj0g2VKSSs1SFMB}y=h+< z1r~l3s#ZwFb6baWmtUo2&+F?% zF$O^h&4h|KorDE!9#vV!85;Xk_RsFer|SkV78xuP8^bj7sAsRtLdiy`RKUV0B*`{{ zY3V04W&Usq?9CEB*5Yg;;b`$}p_kI3uX|0cDlIg3Vl)P0do zm3&u7v0Rn(ZITnH;cwY(`0SOLP>N#AmJ{QFb#C791pN39UK>ODQQ*XIoH5dHV^O{d z7}~csJp((x=I~5l$N!bT6T;+wUH(@mAS!c>S6xL4P#OCSm61g*DG@{o6d^}}2iU}i zZ0u3spFbz;i}Jdzru()G$hk?}H1%Kb^#z`UkN3{h2a2|ofOM))Ndt4F;B3;T>{G`+ z`sV8n+VQ15U7(;nC6~b4I*i;nx$mLQPp(@{$~pXRK`Z_ZrW23>zz9G0g%*J3UzIPh#Ncweg9U>Ek$AgkHT5K8Fege|;9}09 zQlVDGLc#(G08VYu&EFNzjOV@VcvDT9KFK(B1kU1NpRsrtyp&8Zot>g9D53XC=o|Xe#8duT^pgxbpDUwU zJ}*Y9Y!3__NPcS(ZGpGQWul$#J3XIxJyJLLsy|{Nbe*Yh+MqAH_AE3kYt`Wbc`(_;G!C+TzVN{LQE`WLtxOCZG?2{VO361OJ=w2qMYDe%cX+rl) zD!beueu<|l$D^vWQ_b1u6Mq?Zz(~zE3L9Rdi3O@V=iOM&%DT&>UP)^`)mh3Goa!`6ZYZ=vZY+)%> zv>~JkYEG!K=swJXC?fZeolJ| zXFQ`f5gJIe;a$QFEN@yln$n(*%aoCfj*xOW`rNVbN)0D~AK z8Ek)@p$$lzAW>LV&`L)sh7T%=DdkP`2Z}{UpoU=jZ)Ju=LPsd_H}re)znlU$fBECk z0~{NUU(rb?gDZ~C0Af5UsA|`x{bo5xtRPOU z=5||B^;&_0$@W_m!H-6n0a>UBR$Y`J>%H&Dv!?|OyNtkQgL4S9MeO^_IkRBYty99 zC(iU!LD2=qFUPaY4;|xjtm_&5k}ry4{ESz_6;S0XLl3CMx<2y>A@VHqNjR?Ec30!Gb^i6ik{q+t5%$)2gCQAhM+h+~)am%+*{r zQnInPE;FLTP`;s2BV%-_EisKwEB&Ddn#PSywL~STNRc2938ej6TjuFJE_U^DP4nZ7 zZf>Kjv+Qr#hq67Yn-sm9KBo{qmTu&K*;kv_XoGP17wR@`wA_xxuVsuE0 za?W1c9qH?)_3J_i|9QCQ`(J}T;Mj1SQ^&bGK!rcCQ-PPg@%>5hXUrC3$KkZ{M7nWD z4Aou%6hpG5%odf1!X|w89C?6lp>x1z*9cyrBLL*eTQ#Hvts#$VAE06;H<5w?i&uhEIaR&SqSnbps^Lh~Ddg)0B>BC|MT zUh=(vii(a(-v^u?j+5Fo+YMkye^odZj8<^Qs}2du|3|RvA#a8WQ>L1Rv@R2L=iT%J zs=`j!njZBjPy%ID)AhRN=@dB-AFIMXwU0#7M=VMKlaLQspp)`6w5=7P)+ZR9lfGU? zD|-E&^vrb;WknR)c+RA_u1%S&(hf}cZTUkTNcY(IM@1b>D2$)HT^#Sn&>xySzxGPxE$O-x}IGuG#jn z@S5NYzCck<;gN?SF=|i&iY{ww5Z(ho!6Pw}l;_ZWGq};sf>%(ZiCy#V9H?l>Mi2mX z`g0j_6kL}$2Fo7^WAPR6%!YOF*wWi!?XMQY+GTePs^kRAp<&#vp%;Zmm;=ev&w()* zgE82eU^$GU>91ap6>RcK@=VVLy3zZDg3Az{4p4VF&GLg498KB zobC$-_yzI@AkRpQ9b?DXG4$iAa2yI-86ZN4*w#$Av19D`uXJ1mLsIl!r{#KqIgq^m zTcLQ(b_~W~3`T{8ARAZowcM@=wxq&qz%l+SAt^p3`R2gbeEt}W!E#6ny*M9uz6ge> b 3] if prompt else ["sitcom", "funny", "comedy", "ai", "deepfake"] + # upload_to_yt(result.path, result.title, result.description, keywords, "24", "public") + + return result \ No newline at end of file diff --git a/sitcom_simulator/cli.py b/sitcom_simulator/cli.py index c5b99bd..3e4fdeb 100644 --- a/sitcom_simulator/cli.py +++ b/sitcom_simulator/cli.py @@ -1,8 +1,8 @@ -from .sitcom_creator import create_sitcom +from .auto import create_sitcom import argparse import tomllib -def parse_args(): +def _parse_args(): parser = argparse.ArgumentParser( prog = "Sitcom Simulator", description = "A tool that creates bad sitcoms using AI tools", @@ -22,6 +22,9 @@ def parse_args(): return args def main(): + """ + The main entry point for the CLI, invoked when the module is run as a script. + """ print("\nSitcom Simulator\nBy Josh Moody\n") try: @@ -30,7 +33,7 @@ def main(): except FileNotFoundError: # no big deal config = {} - args = parse_args() + args = _parse_args() # do the magic create_sitcom( diff --git a/sitcom_simulator/image_generator/__init__.py b/sitcom_simulator/image/__init__.py similarity index 100% rename from sitcom_simulator/image_generator/__init__.py rename to sitcom_simulator/image/__init__.py diff --git a/sitcom_simulator/image_generator/image_generator.py b/sitcom_simulator/image/image_generator.py similarity index 66% rename from sitcom_simulator/image_generator/image_generator.py rename to sitcom_simulator/image/image_generator.py index b1b129c..280dae5 100644 --- a/sitcom_simulator/image_generator/image_generator.py +++ b/sitcom_simulator/image/image_generator.py @@ -15,12 +15,15 @@ def generate_images( engine:Engine="stability", ): """ - Generates and returns a list of image paths for the given script - @param script: The script to generate images for - @param width: The width of the images to generate - @param height: The height of the images to generate - @param on_image_generated: A callback to call after each image is generated - @param engine: The engine to use for generating images + Generates and returns a list of image paths for the given script. + + More procedural in nature than add_images. + + :param script: The script to generate images for + :param width: The width of the images to generate + :param height: The height of the images to generate + :param on_image_generated: A callback to call after each image is generated which takes the clip index and path to the generated image + :param engine: The engine to use for generating images """ image_paths: List[str | None] = [] image_prompts = [clip.image_prompt for clip in script.clips] @@ -47,6 +50,17 @@ def add_images( on_image_generated: Optional[Callable[[int, str], None]] = None, engine:Engine="stability", ) -> Script: + """ + Given a script, returns the same script but with the image paths filled in. + + More functional in nature than generate_images. + + :param script: The script to add images to + :param width: The width of the images to generate + :param height: The height of the images to generate + :param on_image_generated: A callback to call after each image is generated which takes the clip index and path to the generated image + :param engine: The engine to use for generating images + """ image_paths = generate_images( script=script, width=width, diff --git a/sitcom_simulator/script_generator/integrations/chatgpt/__init__.py b/sitcom_simulator/image/integrations/__init__.py similarity index 100% rename from sitcom_simulator/script_generator/integrations/chatgpt/__init__.py rename to sitcom_simulator/image/integrations/__init__.py diff --git a/sitcom_simulator/image_generator/integrations/pillow.py b/sitcom_simulator/image/integrations/pillow.py similarity index 65% rename from sitcom_simulator/image_generator/integrations/pillow.py rename to sitcom_simulator/image/integrations/pillow.py index aa94015..05ff8ff 100644 --- a/sitcom_simulator/image_generator/integrations/pillow.py +++ b/sitcom_simulator/image/integrations/pillow.py @@ -3,6 +3,13 @@ import tempfile def generate_image(width:int=720, height:int=1280): + """ + Generates a random solid-color image and returns the path to the image file. + Intended for use in debugging and testing. + + :param width: The width of the image to generate + :param height: The height of the image to generate + """ # Generate a random color color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) diff --git a/sitcom_simulator/image_generator/integrations/stability.py b/sitcom_simulator/image/integrations/stability.py similarity index 83% rename from sitcom_simulator/image_generator/integrations/stability.py rename to sitcom_simulator/image/integrations/stability.py index f4e3ab8..fd901ee 100644 --- a/sitcom_simulator/image_generator/integrations/stability.py +++ b/sitcom_simulator/image/integrations/stability.py @@ -7,10 +7,14 @@ STABILITY_HOST = "grpc.stability.ai:443" -def generate_image(prompt: str, width=1024, height=1024): +def generate_image(prompt:str, width:int=1024, height:int=1024): """ - generates an image for each prompt using stable diffusion, + Generates an image for each prompt using stable diffusion, returning a list of file paths for those images + + :param prompt: The prompt to generate the image for + :param width: The width of the image to generate + :param height: The height of the image to generate """ # customize engine here if desired (default is newest) diff --git a/sitcom_simulator/models.py b/sitcom_simulator/models.py index d1b373f..b5a7ec8 100644 --- a/sitcom_simulator/models.py +++ b/sitcom_simulator/models.py @@ -2,21 +2,45 @@ @dataclass(frozen=True) class Character: + """ + A character in a script and information about their voice. + + :param name: The name of the character + :param voice_token: The token for the character's voice + """ name: str voice_token: str @staticmethod - def from_dict(data: dict): + def from_dict(data: dict[str, str]): + """ + Creates a Character from a dictionary with the same shape. + """ return Character( name=data['name'], voice_token=data['voice_token'] ) def replace(self, **kwargs): + """ + Returns a new Character with the specified fields replaced. + """ return replace(self, **kwargs) @dataclass(frozen=True) class Clip: + """ + A clip in a script, including the speaker, speech, and audio. + + :param speaker: The name of the speaker + :param speech: The speech for the clip + :param image_prompt: The prompt for the image + :param image_path: The path to the image + :param audio_url: The URL for the audio + :param audio_path: The path to the audio + :param title: The title of the clip + :param duration: The duration of the clip + """ speaker: str | None speech: str | None image_prompt: str | None @@ -28,14 +52,24 @@ class Clip: @property def needs_audio(self): - return self.speech and not (self.audio_path or self.audio_url) + """ + Returns True if the clip needs audio, and False if it doesn't. + """ + return bool(self.speech and not (self.audio_path or self.audio_url)) @property def needs_image(self): - return self.image_prompt and not self.image_path + """ + Returns True if the clip needs an image, and False if it doesn't. + """ + return bool(self.image_prompt and not self.image_path) @staticmethod def from_dict(data: dict): + """ + Creates a Clip from a dictionary with the same shape. + All fields are optional. + """ return Clip( speaker=data.get('speaker'), speech=data.get('speech'), @@ -48,10 +82,20 @@ def from_dict(data: dict): ) def replace(self, **kwargs): + """ + Returns a new Clip with the specified fields replaced. + """ return replace(self, **kwargs) @dataclass(frozen=True) class ScriptMetadata: + """ + Metadata for a script. + + :param title: The title of the script + :param bgm_style: The style of the background music + :param art_style: The style of the art + """ title: str bgm_style: str art_style: str @@ -59,6 +103,10 @@ class ScriptMetadata: @staticmethod def from_dict(data: dict): + """ + Creates a ScriptMetadata from a dictionary with the same shape. + All fields are required except for bgm_path. + """ return ScriptMetadata( title=data['title'], bgm_style=data['bgm_style'], @@ -67,16 +115,38 @@ def from_dict(data: dict): ) def replace(self, **kwargs): + """ + Returns a new ScriptMetadata with the specified fields replaced. + """ return replace(self, **kwargs) @dataclass(frozen=True) class Script: + """ + Contains all the data for a script, including characters, clips, and metadata. + + The clips are ordered in the order they should be played. + + In general, the fields should be populated in the following order: + 1. characters + 2. clips + 3. metadata + + Metadata is last to give the language model more context before summarizing the script. + + :param characters: A list of characters in the script + :param clips: A list of clips in the script + :param metadata: The metadata for the script + """ characters: list[Character] clips: list[Clip] metadata: ScriptMetadata @staticmethod def from_dict(data: dict): + """ + Returns a Script from a dictionary with the same shape. + """ return Script( characters=[Character.from_dict(character) for character in data['characters']], clips=[Clip.from_dict(clip) for clip in data['clips']], @@ -84,10 +154,20 @@ def from_dict(data: dict): ) def replace(self, **kwargs): + """ + Returns a new Script with the specified fields replaced. + """ return replace(self, **kwargs) @dataclass(frozen=True) class VideoResult: + """ + The result of rendering a video. + + :param path: The path to the rendered video + :param title: The title of the video + :param description: The description of the video + """ path: str title: str description: str \ No newline at end of file diff --git a/sitcom_simulator/music_generator/__init__.py b/sitcom_simulator/music/__init__.py similarity index 100% rename from sitcom_simulator/music_generator/__init__.py rename to sitcom_simulator/music/__init__.py diff --git a/sitcom_simulator/script_generator/integrations/fakeyou/__init__.py b/sitcom_simulator/music/integrations/__init__.py similarity index 100% rename from sitcom_simulator/script_generator/integrations/fakeyou/__init__.py rename to sitcom_simulator/music/integrations/__init__.py diff --git a/sitcom_simulator/music_generator/integrations/freepd.py b/sitcom_simulator/music/integrations/freepd.py similarity index 77% rename from sitcom_simulator/music_generator/integrations/freepd.py rename to sitcom_simulator/music/integrations/freepd.py index 24e73fc..5cefe3f 100644 --- a/sitcom_simulator/music_generator/integrations/freepd.py +++ b/sitcom_simulator/music/integrations/freepd.py @@ -7,6 +7,9 @@ import tempfile class MusicCategory(Enum): + """ + The different categories of music available on FreePD. + """ UPBEAT='upbeat' EPIC='epic' HORROR='horror' @@ -19,9 +22,17 @@ class MusicCategory(Enum): @classmethod def values(cls): + """ + Returns a list of the values of the enum members. + """ return [str(member.value) for name, member in cls.__members__.items()] def download_random_music(category: MusicCategory) -> str | None: + """ + Given a category, downloads a random song from FreePD in that category and returns the path to the downloaded file. + + :param category: The category of music to download + """ # Send a GET request to the website url = f"https://freepd.com/{category.value}.php" response = requests.get(url) @@ -41,7 +52,12 @@ def download_random_music(category: MusicCategory) -> str | None: return download_file(download_link) -def download_file(url): +def download_file(url: str): + """ + Given a URL, downloads the file and returns the path to the downloaded file. + + :param url: The URL of the file to download + """ response = requests.get(url) if response.status_code == 200: # Get the file name from the URL diff --git a/sitcom_simulator/music_generator/music_generator.py b/sitcom_simulator/music/music_generator.py similarity index 62% rename from sitcom_simulator/music_generator/music_generator.py rename to sitcom_simulator/music/music_generator.py index 24c7241..ac682dd 100644 --- a/sitcom_simulator/music_generator/music_generator.py +++ b/sitcom_simulator/music/music_generator.py @@ -10,6 +10,14 @@ def generate_music( category: str | None, engine:Engine="freepd", ): + """ + Generates and returns a path to a music file using the given engine. + + More procedural in nature than add_music. + + :param category: The category of music to generate + :param engine: The engine to use for generating music + """ if engine == "freepd": logging.debug(f"Generating music: {category}") try: @@ -28,6 +36,16 @@ def add_music( category: str | None = None, on_music_generated: Optional[Callable[[str], None]] = None ): + """ + Given a script, returns the same script but with the music path filled in. + + More functional in nature than generate_music. + + :param script: The script to add music to + :param engine: The engine to use for generating music + :param category: The category of music to generate + :param on_music_generated: A callback to call after the music is generated which takes the path to the generated music + """ music_path = generate_music(category) if on_music_generated: on_music_generated(music_path) diff --git a/sitcom_simulator/script_generator/__init__.py b/sitcom_simulator/script/__init__.py similarity index 100% rename from sitcom_simulator/script_generator/__init__.py rename to sitcom_simulator/script/__init__.py diff --git a/sitcom_simulator/script/integrations/__init__.py b/sitcom_simulator/script/integrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sitcom_simulator/script/integrations/chatgpt/__init__.py b/sitcom_simulator/script/integrations/chatgpt/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sitcom_simulator/script_generator/integrations/chatgpt/chatgpt.py b/sitcom_simulator/script/integrations/chatgpt/chatgpt.py similarity index 54% rename from sitcom_simulator/script_generator/integrations/chatgpt/chatgpt.py rename to sitcom_simulator/script/integrations/chatgpt/chatgpt.py index c6d22b0..bf5c9e3 100644 --- a/sitcom_simulator/script_generator/integrations/chatgpt/chatgpt.py +++ b/sitcom_simulator/script/integrations/chatgpt/chatgpt.py @@ -1,6 +1,13 @@ import openai def chat(prompt: str, max_tokens:int=2048, temperature:float=1): + """ + Given a prompt, returns a response from ChatGPT. + + :param prompt: The prompt for the chat + :param max_tokens: The maximum number of tokens to generate + :param temperature: The temperature to use when generating the response, which controls randomness. Higher values make the response more random, while lower values make the response more deterministic. + """ completion = openai.ChatCompletion.create( model="gpt-3.5-turbo", temperature=temperature, diff --git a/sitcom_simulator/script_generator/integrations/chatgpt/instructions.py b/sitcom_simulator/script/integrations/chatgpt/instructions.py similarity index 100% rename from sitcom_simulator/script_generator/integrations/chatgpt/instructions.py rename to sitcom_simulator/script/integrations/chatgpt/instructions.py diff --git a/sitcom_simulator/script/integrations/fakeyou/__init__.py b/sitcom_simulator/script/integrations/fakeyou/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sitcom_simulator/script_generator/integrations/fakeyou/character_extractor.py b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py similarity index 73% rename from sitcom_simulator/script_generator/integrations/fakeyou/character_extractor.py rename to sitcom_simulator/script/integrations/fakeyou/character_extractor.py index 8ca89c9..dcf3c02 100644 --- a/sitcom_simulator/script_generator/integrations/fakeyou/character_extractor.py +++ b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py @@ -1,4 +1,4 @@ -from sitcom_simulator.script_generator.llm import chat +from sitcom_simulator.script.llm import chat import json import requests import re @@ -9,7 +9,11 @@ from typing import List def generate_character_list(prompt: str) -> List[Character]: - "Given a user-submitted prompt, return a list of characters (names + voice_tokens) from FakeYou for the characters in the script." + """ + Uses a large language model to generate a list of possible famous characters related to the prompt. + + :param prompt: The user-submitted prompt + """ instructions = f"""Generate a list of potential characters to use in a short video of this prompt: @@ -46,7 +50,14 @@ def generate_character_list(prompt: str) -> List[Character]: return chosen_characters -def pure_name_to_model(models_list): +def pure_name_to_model(models_list: list[dict]): + """ + Given a list of models from FakeYou, returns a dictionary mapping the pure name of the character to the list of models matching that name. + + A pure name is the name of the character without any parenthetical information, e.g., "Velma (Scooby Doo)" -> "Velma" + + :param models_list: A list of models from FakeYou + """ names_to_model = {} for model in models_list: pure_name = pure_character_name(model['title']) @@ -59,8 +70,14 @@ def pure_name_to_model(models_list): return names_to_model NAME_PATTERN = re.compile(r"^\s*([^\(\n]*[^\s\(])\s*(?:\([^\n]*)?$") -def pure_character_name(raw_name): - "Returns just the character's true name from a FakeYou listing. FakeYou names are typically formatted like \"True Name (source)\" e.g., Velma (Scooby Doo)" +def pure_character_name(raw_name: str): + """ + Returns just the character's true name from a FakeYou listing. + + FakeYou names are typically formatted like \"True Name (source)\" e.g., Velma (Scooby Doo) + + :param raw_name: The raw name of the character from FakeYou + """ match = NAME_PATTERN.search(raw_name) if match: return match.group(1) @@ -68,7 +85,9 @@ def pure_character_name(raw_name): DEFAULT_RATING = 2 # not the worst possible, but pretty bad def calculate_star_rating(model): - "Estimates the true ratio of positive to negative reviews. Intuition: 5 stars from 10 reviews is worse than 4.8 stars from 1000 reviews." + """ + Estimates the true ratio of positive to negative reviews. Intuition: 5 stars from 10 reviews is worse than 4.8 stars from 1000 reviews. + """ if 'user_ratings' not in model: return DEFAULT_RATING positive_count = model['user_ratings']['positive_count'] diff --git a/sitcom_simulator/script_generator/integrations/fakeyou/character_selector.py b/sitcom_simulator/script/integrations/fakeyou/character_selector.py similarity index 89% rename from sitcom_simulator/script_generator/integrations/fakeyou/character_selector.py rename to sitcom_simulator/script/integrations/fakeyou/character_selector.py index 28f95e8..b8dc68e 100644 --- a/sitcom_simulator/script_generator/integrations/fakeyou/character_selector.py +++ b/sitcom_simulator/script/integrations/fakeyou/character_selector.py @@ -10,7 +10,12 @@ # user selects which auto-detected characters to include in the script # (including their voices if generating high-quality audio) -def select_characters(possible_characters): +def select_characters(possible_characters: dict[str, list[str]]): + """ + A procedure to prompt the user to select which auto-detected characters to include in the script. + + :param possible_characters: A dictionary of character names to a list of voice tokens + """ print("--- Character Voice Selection ---") selected_characters = dict() for name, voices in possible_characters.items(): diff --git a/sitcom_simulator/script_generator/integrations/fakeyou/characters.toml b/sitcom_simulator/script/integrations/fakeyou/characters.toml similarity index 100% rename from sitcom_simulator/script_generator/integrations/fakeyou/characters.toml rename to sitcom_simulator/script/integrations/fakeyou/characters.toml diff --git a/sitcom_simulator/script_generator/integrations/fakeyou/narrators.py b/sitcom_simulator/script/integrations/fakeyou/narrators.py similarity index 100% rename from sitcom_simulator/script_generator/integrations/fakeyou/narrators.py rename to sitcom_simulator/script/integrations/fakeyou/narrators.py diff --git a/sitcom_simulator/script_generator/llm.py b/sitcom_simulator/script/llm.py similarity index 100% rename from sitcom_simulator/script_generator/llm.py rename to sitcom_simulator/script/llm.py diff --git a/sitcom_simulator/script_generator/script_generator.py b/sitcom_simulator/script/script_generator.py similarity index 86% rename from sitcom_simulator/script_generator/script_generator.py rename to sitcom_simulator/script/script_generator.py index db2bf60..c3bc5b8 100644 --- a/sitcom_simulator/script_generator/script_generator.py +++ b/sitcom_simulator/script/script_generator.py @@ -1,10 +1,10 @@ from .integrations.chatgpt import chatgpt, instructions from .integrations.fakeyou.character_extractor import generate_character_list from typing import Callable -from ..speech_generator.integrations.fakeyou import get_possible_characters_from_prompt +from ..speech.integrations.fakeyou import get_possible_characters_from_prompt from ..user_input import select_characters as debug_select_characters from .integrations.fakeyou.character_selector import select_characters as fakeyou_select_characters -from ..music_generator import MusicCategory +from ..music import MusicCategory from ..models import Script import tomllib from dataclasses import asdict @@ -24,12 +24,12 @@ def write_script( If characters are passed in, the resulting dialog is constrained to those characters. Otherwise, it prompts the user to select the appropriate characters. - @param prompt: The prompt for the script - @param manual_character_selection: Whether to prompt the user to select the characters. If manual_character_selection == False and characters == None, an LLM will extract characters. - @param max_tokens: The maximum number of tokens to generate - @param require_approval: Whether to prompt the user to approve the generated script - @param temperature: The temperature to use when generating the script - @param fakeyou_characters: Whether to restrict character selection to only voices from fakeyou.com + :param prompt: The prompt for the script + :param manual_character_selection: Whether to prompt the user to select the characters. If manual_character_selection == False and characters == None, an LLM will extract characters. + :param max_tokens: The maximum number of tokens to generate + :param require_approval: Whether to prompt the user to approve the generated script + :param temperature: The temperature to use when generating the script + :param fakeyou_characters: Whether to restrict character selection to only voices from fakeyou.com """ if manual_character_selection: possible_characters = get_possible_characters_from_prompt(prompt) diff --git a/sitcom_simulator/sitcom_creator.py b/sitcom_simulator/sitcom_creator.py deleted file mode 100644 index 38d8746..0000000 --- a/sitcom_simulator/sitcom_creator.py +++ /dev/null @@ -1,63 +0,0 @@ -from .models import Script, VideoResult -from .script_generator import write_script -from .speech_generator import add_voices -from .image_generator import add_images -from .music_generator import add_music -from .video_generator import render_video -from .script_generator import script_from_file -from .social.yt_uploader import upload_to_yt - -def create_sitcom( - prompt: str | None = None, - art_style: str | None = None, - script_path: str | None = None, - debug: bool=False, - font: str = '', - max_tokens:int=2048, - approve_script:bool=False, - manual_select_characters:bool=True, - upload_to_yt=False, -): - if(prompt == None and script_path == None): - prompt = input("Enter a prompt to generate the video script: ") - - assert prompt or script_path, "You must provide a prompt or a script path" - - if prompt and not script_path: - initial_script = write_script( - prompt=prompt, - manual_character_selection=manual_select_characters, - max_tokens=max_tokens, - require_approval=approve_script, - fakeyou_characters=not debug, - ) - elif script_path and not prompt: - initial_script = script_from_file(script_path) - else: - raise ValueError("You must provide a prompt or a script path, not both") - - if art_style: - initial_script = initial_script.replace(metadata=initial_script.metadata.replace(art_style=art_style)) - - script_with_voices = add_voices(initial_script, engine="fakeyou" if not debug else "gtts") - script_with_images = add_images(script_with_voices, engine="stability" if not debug else "pillow") # could theoretically be done in parallel with the audio - script_with_music = add_music(script_with_images) - - final_script = script_with_music - - filename = final_script.metadata.title[:50].strip() or 'render' - output_path = f"./{filename}.mp4" - final_video_path = render_video(script=final_script, font=font, output_path=output_path) - - result = VideoResult( - path=final_video_path, - title=final_script.metadata.title, - description=prompt or 'an AI-generated meme video created with Sitcom Simulator' - ) - - if upload_to_yt: - title = prompt - keywords = [word for word in prompt.split(' ') if len(word) > 3] if prompt else ["sitcom", "funny", "comedy", "ai", "deepfake"] - upload_to_yt(result.path, result.title, result.description, keywords, "24", "public") - - return result \ No newline at end of file diff --git a/sitcom_simulator/speech/__init__.py b/sitcom_simulator/speech/__init__.py new file mode 100644 index 0000000..028c663 --- /dev/null +++ b/sitcom_simulator/speech/__init__.py @@ -0,0 +1 @@ +from sitcom_simulator.speech.speech_generator import generate_voices, add_voices \ No newline at end of file diff --git a/sitcom_simulator/speech/integrations/__init__.py b/sitcom_simulator/speech/integrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sitcom_simulator/speech_generator/integrations/fakeyou.py b/sitcom_simulator/speech/integrations/fakeyou.py similarity index 83% rename from sitcom_simulator/speech_generator/integrations/fakeyou.py rename to sitcom_simulator/speech/integrations/fakeyou.py index ffec6d9..390f3ee 100644 --- a/sitcom_simulator/speech_generator/integrations/fakeyou.py +++ b/sitcom_simulator/speech/integrations/fakeyou.py @@ -9,7 +9,7 @@ from ...models import Script import logging import random -from sitcom_simulator.script_generator.integrations.fakeyou.narrators import BACKUP_NARRATORS +from sitcom_simulator.script.integrations.fakeyou.narrators import BACKUP_NARRATORS import urllib import tempfile import atexit @@ -20,7 +20,11 @@ POLL_RANDOMNESS = 1 def download_voice(url: str): - """Downloads audio from a given URL and saves it to a temporary file.""" + """ + Downloads audio from a given URL and saves it to a temporary file. + + :param url: The URL of the audio to download + """ logging.info("downloading audio:", url) temp_audio_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) atexit.register(os.remove, temp_audio_file.name) @@ -37,6 +41,9 @@ def download_voice(url: str): raise Exception(f"Failed to download audio from URL: {url}. Error: {e.reason}") def fetch_voicelist(): + """ + Fetches the list of available voices from the FakeYou API. + """ response = requests.get('https://api.fakeyou.com/tts/list') json = response.json() if(json['success'] != True): @@ -50,14 +57,28 @@ def string_to_keywords(string: str, stop_at_first_paren=False) -> Set[str]: return {keyword.lower() for keyword in func(string).split(' ') if len(keyword) > 3 and keyword.lower() not in ['test', 'model']} def alphanumeric_to_first_paren(string: str) -> str: - string = string.split('(')[0].strip().replace('-', ' ') + """ + Returns the input string up to the first parenthesis with all non-alphanumeric characters removed. + + :param string: The input string + """ + string = string.split('(')[0].strip().replace('-', ' ') # TODO: fix this for names like Reggie Fils-Aime return alphanumeric(string) def alphanumeric(string: str): + """ + Strips all non-alphanumeric characters from the input string. + + :param string: The input string + """ return re.sub(r'[^a-zA-Z0-9 ]', '', string) -# scan the prompt for character names def get_possible_characters_from_prompt(prompt: str) -> dict: + """ + Scans the prompt for character names and returns a dictionary of character names to a list of voice tokens. + + :param prompt: The prompt for the script + """ possible_characters: Dict[str, List[str]] = dict() voices = fetch_voicelist() prompt_keywords = string_to_keywords(prompt, False) @@ -81,6 +102,13 @@ def get_possible_characters_from_prompt(prompt: str) -> dict: # takes in array of line models def generate_voices(script: Script, on_voice_generated: Optional[Callable[[int, str], None]] = None) -> List[str | None]: + """ + Sequentially generates voices for each line in the script using the FakeYou API. + It is intentionally slow to avoid getting rate limited. + + :param script: The script to generate voices for + :param on_voice_generated: A callback function to call when a voice is generated which takes the clip index and the URL of the generated audio + """ audio_urls: List[str | None] = [] for i, clip in tqdm(enumerate(script.clips), desc="Generating voices", total=len(script.clips)): # skip if doesn't need audio, or if audio already exists (audio should never already exist, but just in case) diff --git a/sitcom_simulator/speech_generator/integrations/gtts.py b/sitcom_simulator/speech/integrations/gtts.py similarity index 64% rename from sitcom_simulator/speech_generator/integrations/gtts.py rename to sitcom_simulator/speech/integrations/gtts.py index 4802003..0f0721f 100644 --- a/sitcom_simulator/speech_generator/integrations/gtts.py +++ b/sitcom_simulator/speech/integrations/gtts.py @@ -7,7 +7,14 @@ import atexit import os -def generate_voices(script: Script, on_voice_generated: Optional[Callable[[int, str], None]] = None) -> List[str | None]: +def generate_voices(script: Script, on_voice_generated: Optional[Callable[[int, str], None]] = None) -> List[str | None]: + """ + Generates and returns a list of voice clip paths for the given script using the Google Text-to-Speech API. + Intended for debugging purposes and ironic memes only. + + :param script: The script to generate voice clips for + :param on_voice_generated: A callback to call after each voice clip is generated which takes the clip index and path to the generated audio + """ filepaths: List[str | None] = [] for i, line in tqdm(enumerate(script.clips), "Generating voice clips", total=len(script.clips)): if not line.speech: diff --git a/sitcom_simulator/speech_generator/speech_generator.py b/sitcom_simulator/speech/speech_generator.py similarity index 56% rename from sitcom_simulator/speech_generator/speech_generator.py rename to sitcom_simulator/speech/speech_generator.py index 3fd1f88..1e12762 100644 --- a/sitcom_simulator/speech_generator/speech_generator.py +++ b/sitcom_simulator/speech/speech_generator.py @@ -11,6 +11,16 @@ def generate_voices( engine:Engine="fakeyou", on_voice_generated: Optional[Callable[[int, str], None]] = None ): + """ + Generates and returns a list of voice clip paths for the given script using the given engine. + + More procedural in nature than add_voices. + This function is typically not used directly, since add_voices is more pleasant to work with. + + :param script: The script to generate voice clips for + :param engine: The engine to use for generating voice clips + :param on_voice_generated: A callback to call after each voice clip is generated which takes the clip index and path to the generated audio + """ # generating voice clips can take a LONG time if args.high_quality_audio == True # because of long delays to avoid API timeouts on FakeYou.com if engine == "fakeyou": @@ -26,5 +36,14 @@ def add_voices( engine:Engine="fakeyou", on_voice_generated: Optional[Callable[[int, str], None]] = None ): + """ + Given a script, returns the same script but with the audio paths filled in. + + More functional in nature than generate_voices. + + :param script: The script to add voices to + :param engine: The engine to use for generating voice clips + :param on_voice_generated: A callback to call after each voice clip is generated which takes the clip index and path to the generated audio + """ audio_paths = generate_voices(script, engine=engine, on_voice_generated=on_voice_generated) return script.replace(clips=[clip.replace(audio_path=audio_path) for clip, audio_path in zip(script.clips, audio_paths)]) \ No newline at end of file diff --git a/sitcom_simulator/speech_generator/__init__.py b/sitcom_simulator/speech_generator/__init__.py deleted file mode 100644 index 6a6f1b7..0000000 --- a/sitcom_simulator/speech_generator/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from sitcom_simulator.speech_generator.speech_generator import generate_voices, add_voices \ No newline at end of file diff --git a/sitcom_simulator/user_input.py b/sitcom_simulator/user_input.py index e0e294c..3062add 100644 --- a/sitcom_simulator/user_input.py +++ b/sitcom_simulator/user_input.py @@ -2,9 +2,16 @@ import random from sitcom_simulator.models import Character -# user selects which auto-detected characters to include in the script -# debug only, fakeyou has another method for selecting characters -def select_characters(possible_characters): + +def select_characters(possible_characters: dict[str, list[str]]): + """ + Generic character selection procedure in which the user + selects which auto-detected characters to include in the script. + + This function is currently unused since FakeYou has its own character selection procedure. + + :param possible_characters: A dictionary of character names to a list of voice tokens + """ print("--- Character Voice Selection ---") selected_characters = dict() for name, voices in possible_characters.items(): @@ -18,8 +25,14 @@ def select_characters(possible_characters): assert len(selected_characters) > 0, "No voices selected. Exiting." return [Character(name, voice) for name, voice in selected_characters.items()] -def describe_characters(characters): - " get visual descriptions for each character from the user " +def describe_characters(characters: dict[str, str]): + """ + A procedure to prompt the user to visually describe the characters in the script. + + This function is currently unused since the language model descriptions are used instead. + + :param characters: A dictionary of character names to voice tokens (although this should change to a list of Character objects in the future) + """ print("\n--- Image Prompt Descriptions ---\n") character_descriptions = {} diff --git a/sitcom_simulator/video_generator/__init__.py b/sitcom_simulator/video/__init__.py similarity index 100% rename from sitcom_simulator/video_generator/__init__.py rename to sitcom_simulator/video/__init__.py diff --git a/sitcom_simulator/video/integrations/__init__.py b/sitcom_simulator/video/integrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sitcom_simulator/video_generator/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py similarity index 75% rename from sitcom_simulator/video_generator/integrations/ffmpeg.py rename to sitcom_simulator/video/integrations/ffmpeg.py index c16d302..0f86820 100644 --- a/sitcom_simulator/video_generator/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -12,11 +12,23 @@ def render_clip( font: str, width:int=720, height:int=1280, - clip_buffer_seconds=0.15, # how much time to wait after characters finish talking - min_clip_seconds=1.5, # minimum time to hold on a clip - speaking_delay_seconds=0.12, # how long after the clip the audio kicks in - caption_max_width=30, + clip_buffer_seconds:float=0.15, + min_clip_seconds:float=1.5, + speaking_delay_seconds:float=0.12, + caption_max_width:int=30, ): + """ + Renders a video clip from the given clip object and returns the path to the rendered video file. + + :param clip: The clip to render + :param font: The path to the font file to use for the captions + :param width: The width of the video + :param height: The height of the video + :param clip_buffer_seconds: How much time to wait after characters finish talking + :param min_clip_seconds: The minimum time to hold on a clip + :param speaking_delay_seconds: Delay before the audio kicks in + :param caption_max_width: The maximum width of the captions, in characters + """ caption = clip.speech if caption: caption = textwrap.fill(caption, width=caption_max_width) @@ -104,6 +116,14 @@ def concatenate_clips( background_music:str|None=None, bgm_volume:float=0.25, ): + """ + Combines the given video clips into a single video file and returns the path to the concatenated video file. + + :param filenames: The list of video file paths to combine + :param output_filename: The name of the output file + :param background_music: The path to the background music file + :param bgm_volume: The volume of the background music, between 0 and 1 + """ # Create input sets for each file in the list input_clips = [ffmpeg.input(f) for f in filenames] @@ -156,11 +176,26 @@ def render_video( output_path: str = 'output.mp4', width:int=720, height:int=1280, - clip_buffer_seconds=0.15, # how much time to wait after characters finish talking - min_clip_length=1.5, # minimum time to hold on a clip - speaking_delay_seconds=0.12, # how long after the clip the audio kicks in + clip_buffer_seconds=0.15, + min_clip_length=1.5, + speaking_delay_seconds=0.12, caption_max_width=30, ): + """ + Renders a video from the given script and returns the path to the rendered video file. + + At present, only 9:16 aspect ratio is supported, but 16:9 and 1:1 will be supported in the future. + + :param script: The script to render + :param font: The path to the font file to use for the captions + :param output_path: The path to save the rendered video + :param width: The width of the video + :param height: The height of the video + :param clip_buffer_seconds: How much time to wait after characters finish talking + :param min_clip_length: The minimum time to hold on a clip + :param speaking_delay_seconds: Delay before the audio kicks in + :param caption_max_width: The maximum width of the captions, in characters + """ intermediate_clips = [] for clip in tqdm(script.clips, desc="Rendering intermediate video clips"): clip_file = render_clip( diff --git a/sitcom_simulator/video/integrations/moviepy.py b/sitcom_simulator/video/integrations/moviepy.py new file mode 100644 index 0000000..46edf53 --- /dev/null +++ b/sitcom_simulator/video/integrations/moviepy.py @@ -0,0 +1,68 @@ +# from moviepy.editor import * +# from ...models import SpeechClip +# from typing import List + +# def generate_movie( +# dialogs: List[SpeechClip], +# font: str, +# output_path="output.mp4", +# width:int=720, +# height:int=1280, +# clip_buffer_seconds=0.35, # how much time to wait after characters finish talking +# min_clip_length=1.5, # minimum time to hold on a clip +# ): +# """ +# MoviePy backend for generating videos. + +# While it still mostly works, it is more limited in functionality than the FFmpeg backend and has thus been deprecated. +# """ +# dialog_clips = [] +# for dialog in dialogs: + +# voiceover = AudioFileClip(dialog.audio) + +# # calculate the duration +# duration = voiceover.duration + clip_buffer_seconds +# if(duration < min_clip_length): +# duration = min_clip_length + +# # black background +# bg = ColorClip(size=(width,height), color=[0,0,0]) +# bg = bg.set_duration(duration) +# bg = bg.set_audio(voiceover) + +# # the image +# img_clip = ImageClip(dialog.image) +# img_clip = img_clip.resize(width/img_clip.w,height/img_clip.h) +# img_clip = img_clip.set_duration(duration) +# img_clip = img_clip.set_fps(24) +# img_clip = img_clip.set_position(('center', 'top')) + +# # the caption +# raw_caption = dialog.caption +# raw_caption_queue = raw_caption +# caption = "" +# # generate line breaks as necessary +# max_chars_per_line = 30 +# char_counter = 0 +# while(len(raw_caption_queue) > 0): +# split = raw_caption_queue.split(' ') +# if(char_counter + len(split[0]) + 1 < max_chars_per_line): +# caption += " " +# char_counter += 1 +# else: +# caption += "\n" +# char_counter = 0 +# caption += split[0] +# char_counter += len(split[0]) +# raw_caption_queue = " ".join(split[1:]) + +# txt_clip = TextClip(caption, fontsize=48, font=font, color='white', size=(width, height - img_clip.h)) +# txt_clip = txt_clip.set_position(('center', 1-float(height-img_clip.h)/float(height)), relative=True).set_duration(duration) + +# video = CompositeVideoClip([bg, img_clip, txt_clip]) +# video = video.set_fps(24) +# dialog_clips.append(video) + +# final_clip = concatenate_videoclips(dialog_clips) +# final_clip.write_videofile(output_path) diff --git a/sitcom_simulator/video/video_generator.py b/sitcom_simulator/video/video_generator.py new file mode 100644 index 0000000..0736c68 --- /dev/null +++ b/sitcom_simulator/video/video_generator.py @@ -0,0 +1,33 @@ +from typing import List +from ..models import Script +from .integrations import ffmpeg + +def render_video( + script: Script, + font: str, + output_path="output.mp4", + width:int=1080, + height:int=1920, + clip_buffer_seconds=0.35, + min_clip_length=1.5, + ): + """ + Renders a video from the given script and returns the path to the rendered video. + + :param script: The script to render + :param font: The path to the font file to use + :param output_path: The path to save the rendered video to + :param width: The width of the video to render + :param height: The height of the video to render + :param clip_buffer_seconds: How much time to wait after characters finish talking + :param min_clip_length: The minimum time to hold on a clip + """ + return ffmpeg.render_video( + script=script, + font=font, + output_path=output_path, + width=width, + height=height, + clip_buffer_seconds=clip_buffer_seconds, + min_clip_length=min_clip_length + ) \ No newline at end of file diff --git a/sitcom_simulator/video_generator/integrations/moviepy.py b/sitcom_simulator/video_generator/integrations/moviepy.py deleted file mode 100644 index ece6204..0000000 --- a/sitcom_simulator/video_generator/integrations/moviepy.py +++ /dev/null @@ -1,68 +0,0 @@ -from moviepy.editor import * -from ...models import SpeechClip -from typing import List - -def generate_movie( - dialogs: List[SpeechClip], - font: str, - output_path="output.mp4", - width:int=720, - height:int=1280, - clip_buffer_seconds=0.35, # how much time to wait after characters finish talking - min_clip_length=1.5, # minimum time to hold on a clip - ): - """ - MoviePy backend for generating videos. - - While it still mostly works, it is more limited in functionality than the FFmpeg backend and has thus been deprecated. - """ - dialog_clips = [] - for dialog in dialogs: - - voiceover = AudioFileClip(dialog.audio) - - # calculate the duration - duration = voiceover.duration + clip_buffer_seconds - if(duration < min_clip_length): - duration = min_clip_length - - # black background - bg = ColorClip(size=(width,height), color=[0,0,0]) - bg = bg.set_duration(duration) - bg = bg.set_audio(voiceover) - - # the image - img_clip = ImageClip(dialog.image) - img_clip = img_clip.resize(width/img_clip.w,height/img_clip.h) - img_clip = img_clip.set_duration(duration) - img_clip = img_clip.set_fps(24) - img_clip = img_clip.set_position(('center', 'top')) - - # the caption - raw_caption = dialog.caption - raw_caption_queue = raw_caption - caption = "" - # generate line breaks as necessary - max_chars_per_line = 30 - char_counter = 0 - while(len(raw_caption_queue) > 0): - split = raw_caption_queue.split(' ') - if(char_counter + len(split[0]) + 1 < max_chars_per_line): - caption += " " - char_counter += 1 - else: - caption += "\n" - char_counter = 0 - caption += split[0] - char_counter += len(split[0]) - raw_caption_queue = " ".join(split[1:]) - - txt_clip = TextClip(caption, fontsize=48, font=font, color='white', size=(width, height - img_clip.h)) - txt_clip = txt_clip.set_position(('center', 1-float(height-img_clip.h)/float(height)), relative=True).set_duration(duration) - - video = CompositeVideoClip([bg, img_clip, txt_clip]) - video = video.set_fps(24) - dialog_clips.append(video) - - final_clip = concatenate_videoclips(dialog_clips) - final_clip.write_videofile(output_path) diff --git a/sitcom_simulator/video_generator/video_generator.py b/sitcom_simulator/video_generator/video_generator.py deleted file mode 100644 index e5749c9..0000000 --- a/sitcom_simulator/video_generator/video_generator.py +++ /dev/null @@ -1,22 +0,0 @@ -from typing import List -from ..models import Script -from .integrations import ffmpeg - -def render_video( - script: Script, - font: str, - output_path="output.mp4", - width:int=1080, - height:int=1920, - clip_buffer_seconds=0.35, # how much time to wait after characters finish talking - min_clip_length=1.5, # minimum time to hold on a clip - ): - return ffmpeg.render_video( - script=script, - font=font, - output_path=output_path, - width=width, - height=height, - clip_buffer_seconds=clip_buffer_seconds, - min_clip_length=min_clip_length - ) \ No newline at end of file From 70a5a06197ba8912327de6c5366fd924ea8e4d8d Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 12 Feb 2024 19:42:36 -0700 Subject: [PATCH 02/40] updated dependencies --- pyproject.toml | 23 +++++++++++------------ requirements.txt | 21 ++++++++++----------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 433289d..3dffabf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.0.9" +version = "0.0.10" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] @@ -14,17 +14,15 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "stability-sdk==0.8.4", - "python-dotenv==1.0.0", - "tqdm==4.66.1", - "openai==0.28.0", - "image==1.5.33", - "ffmpeg-python==0.2.0", - "gtts==2.3.2", - "Pillow==10.0.1", - "beautifulsoup4==4.12.2", - "requests==2.31.0", - "mypy==1.8.0", + "stability-sdk~=0.8.4", + "python-dotenv~=1.0.0", + "tqdm~=4.66.1", + "openai~=0.28.0", + "ffmpeg-python~=0.2.0", + "gtts~=2.3.2", + "Pillow~=10.0.1", + "beautifulsoup4~=4.12.2", + "requests~=2.31.0", # "fakeyou==1.2.5", Currently using raw HTTP requests instead # "moviepy==1.0.3", No longer supported due to lack of features. Using ffmpeg-python instead ] @@ -36,6 +34,7 @@ dev = [ "sphinx", "furo", "sphinx-autodoc-typehints", + "mypy", ] [project.urls] diff --git a/requirements.txt b/requirements.txt index 519a05e..83dc7a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,10 @@ -stability-sdk==0.8.4 -python-dotenv==1.0.0 -tqdm==4.66.1 -openai==0.28.0 -image==1.5.33 -ffmpeg-python==0.2.0 -gtts==2.3.2 -Pillow==10.0.1 -beautifulsoup4==4.12.2 -requests==2.31.0 -mypy==1.8.0 \ No newline at end of file +stability-sdk~=0.8.4 +python-dotenv~=1.0.0 +tqdm~=4.66.1 +openai~=0.28.0 +ffmpeg-python~=0.2.0 +gtts~=2.3.2 +Pillow~=10.0.1 +beautifulsoup4~=4.12.2 +requests~=2.31.0 +mypy~=1.8.0 \ No newline at end of file From 5d137c27466178d7d6dfe436c935e29686748166 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 12 Feb 2024 23:42:41 -0700 Subject: [PATCH 03/40] improved support for callbacks --- pyproject.toml | 2 +- sitcom_simulator/image/image_generator.py | 7 +++++-- sitcom_simulator/models.py | 7 +++++-- .../speech/integrations/fakeyou.py | 10 +++++---- sitcom_simulator/speech/speech_generator.py | 21 +++++++++++++------ 5 files changed, 32 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3dffabf..729c4ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.0.10" +version = "0.0.11" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/image/image_generator.py b/sitcom_simulator/image/image_generator.py index 280dae5..357acb8 100644 --- a/sitcom_simulator/image/image_generator.py +++ b/sitcom_simulator/image/image_generator.py @@ -26,11 +26,14 @@ def generate_images( :param engine: The engine to use for generating images """ image_paths: List[str | None] = [] - image_prompts = [clip.image_prompt for clip in script.clips] - for i, image_prompt in tqdm(enumerate(image_prompts), desc="Generating images", total=len(image_prompts)): + for i, clip in tqdm(enumerate(script.clips), desc="Generating images", total=len(script.clips)): + image_prompt = clip.image_prompt if not image_prompt: image_paths.append(None) continue + if clip.image_path: + image_paths.append(clip.image_path) + continue if engine == "stability": full_prompt = image_prompt + ', ' + script.metadata.art_style image_path = stability.generate_image(prompt=full_prompt, width=width, height=height) diff --git a/sitcom_simulator/models.py b/sitcom_simulator/models.py index b5a7ec8..5bdaa8b 100644 --- a/sitcom_simulator/models.py +++ b/sitcom_simulator/models.py @@ -35,8 +35,9 @@ class Clip: :param speaker: The name of the speaker :param speech: The speech for the clip :param image_prompt: The prompt for the image + :param image_url: The URL for the image (currently unused, but may be used in the future with a different image engine) :param image_path: The path to the image - :param audio_url: The URL for the audio + :param audio_url: The URL for the audio (currently unused, but may be used in the future with a different TTS engine) :param audio_path: The path to the audio :param title: The title of the clip :param duration: The duration of the clip @@ -45,6 +46,7 @@ class Clip: speech: str | None image_prompt: str | None image_path: str | None + image_url: str | None audio_url: str | None audio_path: str | None title: str | None @@ -62,7 +64,7 @@ def needs_image(self): """ Returns True if the clip needs an image, and False if it doesn't. """ - return bool(self.image_prompt and not self.image_path) + return bool(self.image_prompt and not (self.image_path or self.image_url)) @staticmethod def from_dict(data: dict): @@ -75,6 +77,7 @@ def from_dict(data: dict): speech=data.get('speech'), image_prompt=data.get('image_prompt'), image_path=data.get('image_path'), + image_url=data.get('image_url'), audio_url=data.get('audio_url'), audio_path=data.get('audio_path'), title=data.get('title'), diff --git a/sitcom_simulator/speech/integrations/fakeyou.py b/sitcom_simulator/speech/integrations/fakeyou.py index 390f3ee..928adb4 100644 --- a/sitcom_simulator/speech/integrations/fakeyou.py +++ b/sitcom_simulator/speech/integrations/fakeyou.py @@ -100,8 +100,7 @@ def get_possible_characters_from_prompt(prompt: str) -> dict: return possible_characters -# takes in array of line models -def generate_voices(script: Script, on_voice_generated: Optional[Callable[[int, str], None]] = None) -> List[str | None]: +def generate_voices(script: Script, on_voice_url_generated: Optional[Callable[[int, str], None]] = None) -> List[str | None]: """ Sequentially generates voices for each line in the script using the FakeYou API. It is intentionally slow to avoid getting rate limited. @@ -115,6 +114,9 @@ def generate_voices(script: Script, on_voice_generated: Optional[Callable[[int, if not clip.speaker: audio_urls.append(None) continue + if clip.audio_url: + audio_urls.append(clip.audio_url) + continue logging.debug(f'Starting voice job {i} ({clip.speaker}: {clip.speaker})') try: character = next((character for character in script.characters if character.name == clip.speaker)) @@ -169,8 +171,8 @@ def generate_voices(script: Script, on_voice_generated: Optional[Callable[[int, audio_path = json["state"]["maybe_public_bucket_wav_audio_path"] audio_url = f'https://storage.googleapis.com/vocodes-public{audio_path}' audio_urls.append(audio_url) - if(on_voice_generated): - on_voice_generated(i, audio_url) + if(on_voice_url_generated): + on_voice_url_generated(i, audio_url) else: raise Exception("job failed, aborting", json) break diff --git a/sitcom_simulator/speech/speech_generator.py b/sitcom_simulator/speech/speech_generator.py index 1e12762..dcf6fd5 100644 --- a/sitcom_simulator/speech/speech_generator.py +++ b/sitcom_simulator/speech/speech_generator.py @@ -9,7 +9,8 @@ def generate_voices( script: Script, engine:Engine="fakeyou", - on_voice_generated: Optional[Callable[[int, str], None]] = None + on_voice_downloaded: Optional[Callable[[int, str], None]] = None, + fakeyou_on_voice_url_generated: Optional[Callable[[int, str], None]] = None, ): """ Generates and returns a list of voice clip paths for the given script using the given engine. @@ -19,15 +20,23 @@ def generate_voices( :param script: The script to generate voice clips for :param engine: The engine to use for generating voice clips - :param on_voice_generated: A callback to call after each voice clip is generated which takes the clip index and path to the generated audio + :param on_voice_downloaded: A callback to call after each voice clip is downloaded which takes the clip index and path to the downloaded audio + :param fakeyou_on_voice_url_generated: A callback to call after each FakeYou voice clip is generated which takes the clip index and url of the generated audio """ # generating voice clips can take a LONG time if args.high_quality_audio == True # because of long delays to avoid API timeouts on FakeYou.com if engine == "fakeyou": - audio_urls = fakeyou.generate_voices(script, on_voice_generated) - audio_paths = [fakeyou.download_voice(audio_url) if audio_url else None for audio_url in audio_urls] + audio_urls = fakeyou.generate_voices(script, fakeyou_on_voice_url_generated) + audio_paths = [] + for i, audio_url in enumerate(audio_urls): + if audio_url is None: continue + audio_path = fakeyou.download_voice(audio_url) + audio_paths.append(audio_path) + if on_voice_downloaded: + on_voice_downloaded(i, audio_path) + return audio_paths else: - audio_paths = gtts.generate_voices(script, on_voice_generated) + audio_paths = gtts.generate_voices(script, on_voice_downloaded) return audio_paths @@ -45,5 +54,5 @@ def add_voices( :param engine: The engine to use for generating voice clips :param on_voice_generated: A callback to call after each voice clip is generated which takes the clip index and path to the generated audio """ - audio_paths = generate_voices(script, engine=engine, on_voice_generated=on_voice_generated) + audio_paths = generate_voices(script, engine=engine, fakeyou_on_voice_url_generated=on_voice_generated) return script.replace(clips=[clip.replace(audio_path=audio_path) for clip, audio_path in zip(script.clips, audio_paths)]) \ No newline at end of file From 12b7768ee53f699846994efc9d70387b784369a3 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 13 Feb 2024 15:59:43 -0700 Subject: [PATCH 04/40] added lazy loading to the heavier dependencies --- pyproject.toml | 2 +- sitcom_simulator/auto.py | 18 +++++++++--------- sitcom_simulator/image/image_generator.py | 2 +- sitcom_simulator/image/integrations/pillow.py | 4 ++-- .../image/integrations/stability.py | 5 +++-- sitcom_simulator/music/integrations/freepd.py | 5 +++-- sitcom_simulator/music/music_generator.py | 2 +- .../script/integrations/chatgpt/chatgpt.py | 3 +-- .../integrations/chatgpt/instructions.py | 2 ++ .../fakeyou/character_extractor.py | 4 ++-- sitcom_simulator/script/llm.py | 3 +-- .../speech/integrations/fakeyou.py | 3 ++- sitcom_simulator/speech/integrations/gtts.py | 2 +- sitcom_simulator/speech/speech_generator.py | 4 ++-- sitcom_simulator/user_input.py | 3 --- sitcom_simulator/video/integrations/ffmpeg.py | 2 +- sitcom_simulator/video/video_generator.py | 2 +- 17 files changed, 33 insertions(+), 33 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 729c4ed..1af696f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.0.11" +version = "0.0.12" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index 1eed7ca..8f4bf30 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -1,12 +1,3 @@ -from .models import Script, VideoResult -from .script import write_script -from .speech import add_voices -from .image import add_images -from .music import add_music -from .video import render_video -from .script import script_from_file -from .social.yt_uploader import upload_to_yt - def create_sitcom( prompt: str | None = None, art_style: str | None = None, @@ -32,6 +23,15 @@ def create_sitcom( :param manual_select_characters: If True, the user will be prompted to select the characters for the video. If False, the characters will be selected automatically by the language model. :param upload_to_yt: If True, the video will be uploaded to YouTube after it is generated. NOTE: currently does not work. """ + from .models import Script, VideoResult + from .script import write_script + from .speech import add_voices + from .image import add_images + from .music import add_music + from .video import render_video + from .script import script_from_file + from .social.yt_uploader import upload_to_yt + if(prompt == None and script_path == None): prompt = input("Enter a prompt to generate the video script: ") diff --git a/sitcom_simulator/image/image_generator.py b/sitcom_simulator/image/image_generator.py index 357acb8..1fed574 100644 --- a/sitcom_simulator/image/image_generator.py +++ b/sitcom_simulator/image/image_generator.py @@ -1,4 +1,3 @@ -from .integrations import stability, pillow from tqdm import tqdm from typing import List, Optional, Callable, Literal from sitcom_simulator.models import Script @@ -25,6 +24,7 @@ def generate_images( :param on_image_generated: A callback to call after each image is generated which takes the clip index and path to the generated image :param engine: The engine to use for generating images """ + from .integrations import stability, pillow image_paths: List[str | None] = [] for i, clip in tqdm(enumerate(script.clips), desc="Generating images", total=len(script.clips)): image_prompt = clip.image_prompt diff --git a/sitcom_simulator/image/integrations/pillow.py b/sitcom_simulator/image/integrations/pillow.py index 05ff8ff..5edb1fa 100644 --- a/sitcom_simulator/image/integrations/pillow.py +++ b/sitcom_simulator/image/integrations/pillow.py @@ -1,4 +1,3 @@ -from PIL import Image import random import tempfile @@ -6,10 +5,11 @@ def generate_image(width:int=720, height:int=1280): """ Generates a random solid-color image and returns the path to the image file. Intended for use in debugging and testing. - + :param width: The width of the image to generate :param height: The height of the image to generate """ + from PIL import Image # Generate a random color color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) diff --git a/sitcom_simulator/image/integrations/stability.py b/sitcom_simulator/image/integrations/stability.py index fd901ee..250a90c 100644 --- a/sitcom_simulator/image/integrations/stability.py +++ b/sitcom_simulator/image/integrations/stability.py @@ -1,5 +1,3 @@ -from stability_sdk.client import StabilityInference, process_artifacts_from_answers -import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation import tempfile import mimetypes import os @@ -16,6 +14,9 @@ def generate_image(prompt:str, width:int=1024, height:int=1024): :param width: The width of the image to generate :param height: The height of the image to generate """ + # lazy load because this is a heavy dependency + import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation + from stability_sdk.client import StabilityInference, process_artifacts_from_answers # customize engine here if desired (default is newest) # i.e. engine='stable-diffusion-v1-5' diff --git a/sitcom_simulator/music/integrations/freepd.py b/sitcom_simulator/music/integrations/freepd.py index 5cefe3f..1b23613 100644 --- a/sitcom_simulator/music/integrations/freepd.py +++ b/sitcom_simulator/music/integrations/freepd.py @@ -1,5 +1,3 @@ -import requests -from bs4 import BeautifulSoup import random import os from enum import Enum @@ -33,6 +31,9 @@ def download_random_music(category: MusicCategory) -> str | None: :param category: The category of music to download """ + from bs4 import BeautifulSoup + import requests + # Send a GET request to the website url = f"https://freepd.com/{category.value}.php" response = requests.get(url) diff --git a/sitcom_simulator/music/music_generator.py b/sitcom_simulator/music/music_generator.py index ac682dd..641a1b1 100644 --- a/sitcom_simulator/music/music_generator.py +++ b/sitcom_simulator/music/music_generator.py @@ -1,4 +1,3 @@ -from .integrations import freepd from typing import Literal, Callable, Optional import random from sitcom_simulator.models import Script @@ -18,6 +17,7 @@ def generate_music( :param category: The category of music to generate :param engine: The engine to use for generating music """ + from .integrations import freepd if engine == "freepd": logging.debug(f"Generating music: {category}") try: diff --git a/sitcom_simulator/script/integrations/chatgpt/chatgpt.py b/sitcom_simulator/script/integrations/chatgpt/chatgpt.py index bf5c9e3..7fdeed0 100644 --- a/sitcom_simulator/script/integrations/chatgpt/chatgpt.py +++ b/sitcom_simulator/script/integrations/chatgpt/chatgpt.py @@ -1,5 +1,3 @@ -import openai - def chat(prompt: str, max_tokens:int=2048, temperature:float=1): """ Given a prompt, returns a response from ChatGPT. @@ -8,6 +6,7 @@ def chat(prompt: str, max_tokens:int=2048, temperature:float=1): :param max_tokens: The maximum number of tokens to generate :param temperature: The temperature to use when generating the response, which controls randomness. Higher values make the response more random, while lower values make the response more deterministic. """ + import openai completion = openai.ChatCompletion.create( model="gpt-3.5-turbo", temperature=temperature, diff --git a/sitcom_simulator/script/integrations/chatgpt/instructions.py b/sitcom_simulator/script/integrations/chatgpt/instructions.py index 824d126..c4cb43a 100644 --- a/sitcom_simulator/script/integrations/chatgpt/instructions.py +++ b/sitcom_simulator/script/integrations/chatgpt/instructions.py @@ -1,3 +1,5 @@ +# I think textwrap has a feature to eliminate left padding, but I'm not sure how to use it. + base_prompt = """You are a witty, avant-garde creative genius who writes short video scripts consisting of AI-generated still images and audio. Your output should be structured in TOML. diff --git a/sitcom_simulator/script/integrations/fakeyou/character_extractor.py b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py index dcf3c02..76c1c55 100644 --- a/sitcom_simulator/script/integrations/fakeyou/character_extractor.py +++ b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py @@ -1,6 +1,4 @@ -from sitcom_simulator.script.llm import chat import json -import requests import re import random from .narrators import BACKUP_NARRATORS @@ -24,6 +22,8 @@ def generate_character_list(prompt: str) -> List[Character]: Keep the list short and focused. Structure your output as a JSON list of strings. """ + from sitcom_simulator.script.llm import chat + import requests raw_response = chat(instructions) character_names = json.loads(raw_response) diff --git a/sitcom_simulator/script/llm.py b/sitcom_simulator/script/llm.py index 71e3f92..61dd19f 100644 --- a/sitcom_simulator/script/llm.py +++ b/sitcom_simulator/script/llm.py @@ -1,4 +1,3 @@ -from .integrations.chatgpt import chatgpt - def chat(prompt: str, max_tokens:int=2048, temperature:float=1): + from .integrations.chatgpt import chatgpt return chatgpt.chat(prompt, max_tokens, temperature) \ No newline at end of file diff --git a/sitcom_simulator/speech/integrations/fakeyou.py b/sitcom_simulator/speech/integrations/fakeyou.py index 928adb4..2af289e 100644 --- a/sitcom_simulator/speech/integrations/fakeyou.py +++ b/sitcom_simulator/speech/integrations/fakeyou.py @@ -1,4 +1,3 @@ -import requests from tqdm import tqdm from typing import List, Set, Callable, Optional, Dict import re @@ -29,6 +28,7 @@ def download_voice(url: str): temp_audio_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) atexit.register(os.remove, temp_audio_file.name) try: + # uses urllib because AWS lambda doesn't have requests (not that that matters anymore) with urllib.request.urlopen(url) as response, open(temp_audio_file.name, 'wb') as out_file: data = response.read() # Read the content as bytes out_file.write(data) @@ -44,6 +44,7 @@ def fetch_voicelist(): """ Fetches the list of available voices from the FakeYou API. """ + import requests response = requests.get('https://api.fakeyou.com/tts/list') json = response.json() if(json['success'] != True): diff --git a/sitcom_simulator/speech/integrations/gtts.py b/sitcom_simulator/speech/integrations/gtts.py index 0f0721f..bf2051e 100644 --- a/sitcom_simulator/speech/integrations/gtts.py +++ b/sitcom_simulator/speech/integrations/gtts.py @@ -1,5 +1,4 @@ import tempfile -from gtts import gTTS from typing import List from ...models import Script from tqdm import tqdm @@ -15,6 +14,7 @@ def generate_voices(script: Script, on_voice_generated: Optional[Callable[[int, :param script: The script to generate voice clips for :param on_voice_generated: A callback to call after each voice clip is generated which takes the clip index and path to the generated audio """ + from gtts import gTTS filepaths: List[str | None] = [] for i, line in tqdm(enumerate(script.clips), "Generating voice clips", total=len(script.clips)): if not line.speech: diff --git a/sitcom_simulator/speech/speech_generator.py b/sitcom_simulator/speech/speech_generator.py index dcf6fd5..331f6de 100644 --- a/sitcom_simulator/speech/speech_generator.py +++ b/sitcom_simulator/speech/speech_generator.py @@ -1,5 +1,3 @@ -from .integrations import fakeyou as fakeyou -from .integrations import gtts as gtts from typing import List, Literal from sitcom_simulator.models import Script from typing import Optional, Callable @@ -23,6 +21,8 @@ def generate_voices( :param on_voice_downloaded: A callback to call after each voice clip is downloaded which takes the clip index and path to the downloaded audio :param fakeyou_on_voice_url_generated: A callback to call after each FakeYou voice clip is generated which takes the clip index and url of the generated audio """ + from .integrations import fakeyou as fakeyou + from .integrations import gtts as gtts # generating voice clips can take a LONG time if args.high_quality_audio == True # because of long delays to avoid API timeouts on FakeYou.com if engine == "fakeyou": diff --git a/sitcom_simulator/user_input.py b/sitcom_simulator/user_input.py index 3062add..7c10f8f 100644 --- a/sitcom_simulator/user_input.py +++ b/sitcom_simulator/user_input.py @@ -1,8 +1,5 @@ -import tomllib -import random from sitcom_simulator.models import Character - def select_characters(possible_characters: dict[str, list[str]]): """ Generic character selection procedure in which the user diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index 0f86820..6e3dd11 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -1,4 +1,3 @@ -import ffmpeg from ...models import Script, Clip from typing import List import os @@ -29,6 +28,7 @@ def render_clip( :param speaking_delay_seconds: Delay before the audio kicks in :param caption_max_width: The maximum width of the captions, in characters """ + import ffmpeg caption = clip.speech if caption: caption = textwrap.fill(caption, width=caption_max_width) diff --git a/sitcom_simulator/video/video_generator.py b/sitcom_simulator/video/video_generator.py index 0736c68..550a999 100644 --- a/sitcom_simulator/video/video_generator.py +++ b/sitcom_simulator/video/video_generator.py @@ -1,6 +1,5 @@ from typing import List from ..models import Script -from .integrations import ffmpeg def render_video( script: Script, @@ -22,6 +21,7 @@ def render_video( :param clip_buffer_seconds: How much time to wait after characters finish talking :param min_clip_length: The minimum time to hold on a clip """ + from .integrations import ffmpeg return ffmpeg.render_video( script=script, font=font, From 1d2c23d007796dd04f72b372cc9872fbaa029003 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 13 Feb 2024 16:11:30 -0700 Subject: [PATCH 05/40] added more lazy loading --- pyproject.toml | 2 +- sitcom_simulator/music/__init__.py | 2 +- sitcom_simulator/music/integrations/freepd.py | 1 + sitcom_simulator/music/music_generator.py | 6 ++---- sitcom_simulator/script/script_generator.py | 13 +++++++------ sitcom_simulator/speech/integrations/fakeyou.py | 1 + sitcom_simulator/video/integrations/ffmpeg.py | 1 + 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1af696f..2c84ef5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.0.12" +version = "0.0.13" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/music/__init__.py b/sitcom_simulator/music/__init__.py index d1fc97c..ad24d0d 100644 --- a/sitcom_simulator/music/__init__.py +++ b/sitcom_simulator/music/__init__.py @@ -1 +1 @@ -from .music_generator import add_music, generate_music, MusicCategory \ No newline at end of file +from .music_generator import add_music, generate_music \ No newline at end of file diff --git a/sitcom_simulator/music/integrations/freepd.py b/sitcom_simulator/music/integrations/freepd.py index 1b23613..21f8866 100644 --- a/sitcom_simulator/music/integrations/freepd.py +++ b/sitcom_simulator/music/integrations/freepd.py @@ -59,6 +59,7 @@ def download_file(url: str): :param url: The URL of the file to download """ + import requests response = requests.get(url) if response.status_code == 200: # Get the file name from the URL diff --git a/sitcom_simulator/music/music_generator.py b/sitcom_simulator/music/music_generator.py index 641a1b1..fc733ce 100644 --- a/sitcom_simulator/music/music_generator.py +++ b/sitcom_simulator/music/music_generator.py @@ -25,7 +25,7 @@ def generate_music( except ValueError: freepd_category = None if freepd_category is None: - freepd_category = random.choice(list(MusicCategory)) + freepd_category = random.choice(list(freepd.MusicCategory)) return freepd.download_random_music(freepd_category) else: raise ValueError(f"Invalid engine: {engine}") @@ -49,6 +49,4 @@ def add_music( music_path = generate_music(category) if on_music_generated: on_music_generated(music_path) - return script.replace(metadata=script.metadata.replace(bgm_path=music_path)) - -MusicCategory = freepd.MusicCategory \ No newline at end of file + return script.replace(metadata=script.metadata.replace(bgm_path=music_path)) \ No newline at end of file diff --git a/sitcom_simulator/script/script_generator.py b/sitcom_simulator/script/script_generator.py index c3bc5b8..833c516 100644 --- a/sitcom_simulator/script/script_generator.py +++ b/sitcom_simulator/script/script_generator.py @@ -1,10 +1,4 @@ -from .integrations.chatgpt import chatgpt, instructions -from .integrations.fakeyou.character_extractor import generate_character_list from typing import Callable -from ..speech.integrations.fakeyou import get_possible_characters_from_prompt -from ..user_input import select_characters as debug_select_characters -from .integrations.fakeyou.character_selector import select_characters as fakeyou_select_characters -from ..music import MusicCategory from ..models import Script import tomllib from dataclasses import asdict @@ -31,7 +25,14 @@ def write_script( :param temperature: The temperature to use when generating the script :param fakeyou_characters: Whether to restrict character selection to only voices from fakeyou.com """ + from ..speech.integrations.fakeyou import get_possible_characters_from_prompt + from .integrations.chatgpt import chatgpt, instructions + from .integrations.fakeyou.character_extractor import generate_character_list + from ..music.integrations.freepd import MusicCategory + if manual_character_selection: + from .integrations.fakeyou.character_selector import select_characters as fakeyou_select_characters + from ..user_input import select_characters as debug_select_characters possible_characters = get_possible_characters_from_prompt(prompt) select_characters: Callable = fakeyou_select_characters if fakeyou_characters else debug_select_characters characters = select_characters(possible_characters) diff --git a/sitcom_simulator/speech/integrations/fakeyou.py b/sitcom_simulator/speech/integrations/fakeyou.py index 2af289e..200a92e 100644 --- a/sitcom_simulator/speech/integrations/fakeyou.py +++ b/sitcom_simulator/speech/integrations/fakeyou.py @@ -109,6 +109,7 @@ def generate_voices(script: Script, on_voice_url_generated: Optional[Callable[[i :param script: The script to generate voices for :param on_voice_generated: A callback function to call when a voice is generated which takes the clip index and the URL of the generated audio """ + import requests audio_urls: List[str | None] = [] for i, clip in tqdm(enumerate(script.clips), desc="Generating voices", total=len(script.clips)): # skip if doesn't need audio, or if audio already exists (audio should never already exist, but just in case) diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index 6e3dd11..c49bde3 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -124,6 +124,7 @@ def concatenate_clips( :param background_music: The path to the background music file :param bgm_volume: The volume of the background music, between 0 and 1 """ + import ffmpeg # Create input sets for each file in the list input_clips = [ffmpeg.input(f) for f in filenames] From 38c4965c18b61a2ea9619fc67cc529fff2a18ad4 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 13 Feb 2024 17:26:45 -0700 Subject: [PATCH 06/40] added support for remote images --- pyproject.toml | 2 +- sitcom_simulator/video/video_generator.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2c84ef5..1f1d3e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.0.13" +version = "0.0.14" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/video/video_generator.py b/sitcom_simulator/video/video_generator.py index 550a999..6bd6f35 100644 --- a/sitcom_simulator/video/video_generator.py +++ b/sitcom_simulator/video/video_generator.py @@ -21,6 +21,23 @@ def render_video( :param clip_buffer_seconds: How much time to wait after characters finish talking :param min_clip_length: The minimum time to hold on a clip """ + # rely on image_path first, but if it's not there and image_url is, download the image + import requests + import tempfile + for i, clip in enumerate(script.clips): + if clip.image_path: + continue + if clip.image_url: + try: + response = requests.get(clip.image_url) + image_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name + with open(image_path, 'wb') as f: + f.write(response.content) + clip.image_path = image_path + except Exception as e: + import logging + logging.error(f"Failed to download image for clip {i}: {e}") + from .integrations import ffmpeg return ffmpeg.render_video( script=script, From 2ead7b95c4453f72f6c7301edf2b75c2a8273fb6 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 13 Feb 2024 18:07:29 -0700 Subject: [PATCH 07/40] unfroze models for convenience --- pyproject.toml | 2 +- sitcom_simulator/models.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1f1d3e6..a4f50f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.0.14" +version = "0.0.15" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/models.py b/sitcom_simulator/models.py index 5bdaa8b..12f4faf 100644 --- a/sitcom_simulator/models.py +++ b/sitcom_simulator/models.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, replace -@dataclass(frozen=True) +@dataclass class Character: """ A character in a script and information about their voice. @@ -27,7 +27,7 @@ def replace(self, **kwargs): """ return replace(self, **kwargs) -@dataclass(frozen=True) +@dataclass class Clip: """ A clip in a script, including the speaker, speech, and audio. @@ -90,7 +90,7 @@ def replace(self, **kwargs): """ return replace(self, **kwargs) -@dataclass(frozen=True) +@dataclass class ScriptMetadata: """ Metadata for a script. @@ -123,7 +123,7 @@ def replace(self, **kwargs): """ return replace(self, **kwargs) -@dataclass(frozen=True) +@dataclass class Script: """ Contains all the data for a script, including characters, clips, and metadata. @@ -162,7 +162,7 @@ def replace(self, **kwargs): """ return replace(self, **kwargs) -@dataclass(frozen=True) +@dataclass class VideoResult: """ The result of rendering a video. From e708c1d0674b16e5a71cd5f5d6cb9e5bb2bd6ab7 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 13 Feb 2024 18:22:49 -0700 Subject: [PATCH 08/40] added support for remote audio --- pyproject.toml | 2 +- sitcom_simulator/video/video_generator.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a4f50f7..a7ff2ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.0.15" +version = "0.0.16" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/video/video_generator.py b/sitcom_simulator/video/video_generator.py index 6bd6f35..7336214 100644 --- a/sitcom_simulator/video/video_generator.py +++ b/sitcom_simulator/video/video_generator.py @@ -38,6 +38,21 @@ def render_video( import logging logging.error(f"Failed to download image for clip {i}: {e}") + # same thing but with audio + for i, clip in enumerate(script.clips): + if clip.audio_path: + continue + if clip.audio_url: + try: + response = requests.get(clip.audio_url) + audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name + with open(audio_path, 'wb') as f: + f.write(response.content) + clip.audio_path = audio_path + except Exception as e: + import logging + logging.error(f"Failed to download audio for clip {i}: {e}") + from .integrations import ffmpeg return ffmpeg.render_video( script=script, From 59290a45ec6c3c7a30cd3bcdee3871a9f0404ad8 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 13 Feb 2024 18:39:19 -0700 Subject: [PATCH 09/40] ffmpeg bugfix --- pyproject.toml | 2 +- sitcom_simulator/video/integrations/ffmpeg.py | 3 ++- sitcom_simulator/video/video_generator.py | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a7ff2ce..c8a307a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.0.16" +version = "0.0.17" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index c49bde3..6b03ff1 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -101,7 +101,8 @@ def render_clip( with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file: intermediate_clip = ( ffmpeg.output(*input_streams, temp_file.name, vcodec='libx264', preset='superfast', acodec='mp3', t=duration) - .run(overwrite_output=True, capture_stderr=True) + .overwrite_output() + .run() ) atexit.register(os.remove, temp_file.name) return temp_file.name diff --git a/sitcom_simulator/video/video_generator.py b/sitcom_simulator/video/video_generator.py index 7336214..cdc8050 100644 --- a/sitcom_simulator/video/video_generator.py +++ b/sitcom_simulator/video/video_generator.py @@ -44,8 +44,9 @@ def render_video( continue if clip.audio_url: try: + ext = clip.audio_url.split('.')[-1] response = requests.get(clip.audio_url) - audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name + audio_path = tempfile.NamedTemporaryFile(suffix=ext, delete=False).name with open(audio_path, 'wb') as f: f.write(response.content) clip.audio_path = audio_path From 8f603fffd5c8f3e1b8eb4ae10edded9f1d92c9b9 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 13 Feb 2024 18:51:41 -0700 Subject: [PATCH 10/40] more audio probing bug fixes --- pyproject.toml | 2 +- sitcom_simulator/video/integrations/ffmpeg.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c8a307a..293fffe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.0.17" +version = "0.0.18" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index 6b03ff1..5a7021b 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -43,10 +43,12 @@ def render_clip( subtitle_style = box_style # + box_style # mix and match as desired try: - audio_duration = float(ffmpeg.probe(clip.audio_path.replace('/', '\\'))['streams'][0]['duration']) if clip.audio_path else 0 + audio_path = clip.audio_path.replace('/', '\\') if os.name == 'nt' else clip.audio_path + audio_duration = float(ffmpeg.probe(audio_path)['streams'][0]['duration']) if clip.audio_path else 0 except Exception as e: print(f"Error probing audio duration: {e}.\nHave you put ffmpeg and ffprobe binaries into the root project directory?") - raise e + print(clip.audio_path) + audio_duration = 0 duration = audio_duration + clip_buffer_seconds + speaking_delay_seconds duration = max(duration, min_clip_seconds) From 3815b4df085407b5af7c88de18dce625f0f7fdcd Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Fri, 16 Feb 2024 17:26:43 -0700 Subject: [PATCH 11/40] made metadata more flexible --- sitcom_simulator/auto.py | 4 ++-- sitcom_simulator/image/image_generator.py | 2 +- sitcom_simulator/models.py | 18 ++++++++++++------ sitcom_simulator/script/script_generator.py | 2 +- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index 8f4bf30..3fe0f57 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -59,13 +59,13 @@ def create_sitcom( final_script = script_with_music - filename = final_script.metadata.title[:50].strip() or 'render' + filename = final_script.metadata.title[:50].strip() or 'render' if final_script.metadata.title else 'render' output_path = f"./{filename}.mp4" final_video_path = render_video(script=final_script, font=font, output_path=output_path) result = VideoResult( path=final_video_path, - title=final_script.metadata.title, + title=final_script.metadata.title if final_script.metadata.title else filename, description=prompt or 'an AI-generated meme video created with Sitcom Simulator' ) diff --git a/sitcom_simulator/image/image_generator.py b/sitcom_simulator/image/image_generator.py index 1fed574..128a397 100644 --- a/sitcom_simulator/image/image_generator.py +++ b/sitcom_simulator/image/image_generator.py @@ -35,7 +35,7 @@ def generate_images( image_paths.append(clip.image_path) continue if engine == "stability": - full_prompt = image_prompt + ', ' + script.metadata.art_style + full_prompt = f'{image_prompt}{', ' + script.metadata.art_style if script.metadata.art_style else ''}' image_path = stability.generate_image(prompt=full_prompt, width=width, height=height) else: # debug engine image_path = pillow.generate_image(width, height) diff --git a/sitcom_simulator/models.py b/sitcom_simulator/models.py index 12f4faf..7b6a6b3 100644 --- a/sitcom_simulator/models.py +++ b/sitcom_simulator/models.py @@ -98,10 +98,14 @@ class ScriptMetadata: :param title: The title of the script :param bgm_style: The style of the background music :param art_style: The style of the art + :param prompt: The prompt for the script + :param bgm_path: The path to the background music + :param misc: Any additional metadata """ - title: str - bgm_style: str - art_style: str + title: str | None + bgm_style: str | None + art_style: str | None + prompt: str | None bgm_path: str | None @staticmethod @@ -110,10 +114,12 @@ def from_dict(data: dict): Creates a ScriptMetadata from a dictionary with the same shape. All fields are required except for bgm_path. """ + # creates misc from all data attributes besides the main ones return ScriptMetadata( - title=data['title'], - bgm_style=data['bgm_style'], - art_style=data['art_style'], + title=data.get('title'), + bgm_style=data.get('bgm_style'), + art_style=data.get('art_style'), + prompt=data.get('prompt'), bgm_path=data.get('bgm_path'), ) diff --git a/sitcom_simulator/script/script_generator.py b/sitcom_simulator/script/script_generator.py index 833c516..85ab074 100644 --- a/sitcom_simulator/script/script_generator.py +++ b/sitcom_simulator/script/script_generator.py @@ -69,6 +69,6 @@ def script_from_file(path: str) -> Script: return script def formatted_script(script: Script) -> str: - metadata = f"Title: {script.metadata.title}\nStyle: {script.metadata.art_style}\n" + metadata = f"Title: {script.metadata.title or ''}\nStyle: {script.metadata.art_style or ''}\n" clips = "\n".join([f"{c.speaker}: {c.speech}" for c in script.clips if c.speaker]) return metadata + clips \ No newline at end of file From e6aeba658731928913d71ccb7d72e96cd16c985e Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Fri, 16 Feb 2024 18:07:15 -0700 Subject: [PATCH 12/40] added audio delay settings and font settings to the API and CLI --- config.toml | 9 ------- pyproject.toml | 2 +- sitcom_simulator/auto.py | 11 +++++++- sitcom_simulator/cli.py | 17 ++++++------- sitcom_simulator/image/image_generator.py | 2 +- .../speech/integrations/fakeyou.py | 15 ++++++----- sitcom_simulator/speech/speech_generator.py | 25 ++++++++++++++++--- 7 files changed, 51 insertions(+), 30 deletions(-) delete mode 100644 config.toml diff --git a/config.toml b/config.toml deleted file mode 100644 index ee203bc..0000000 --- a/config.toml +++ /dev/null @@ -1,9 +0,0 @@ -# video font -font = "Arial" - -# how long to wait between starting FakeYou voice jobs -# (anything shorter than 30 seconds is likely to get rate limited and crash the app) -job_delay = 30 # this setting currently does nothing, working on it - -# how long to wait between polling pending FakeYou voice jobsd -poll_delay = 10 # this setting currently does nothing, working on it \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 293fffe..676d5af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.0.18" +version = "0.1.0" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index 3fe0f57..c29ba8a 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -8,6 +8,8 @@ def create_sitcom( approve_script:bool=False, manual_select_characters:bool=True, upload_to_yt=False, + audio_job_delay:int=30, + audio_poll_delay:int=10, ): """ Generates a sitcom video based on a prompt or a script file. @@ -22,6 +24,8 @@ def create_sitcom( :param approve_script: If True, the script must be approved by the user before generating the video. :param manual_select_characters: If True, the user will be prompted to select the characters for the video. If False, the characters will be selected automatically by the language model. :param upload_to_yt: If True, the video will be uploaded to YouTube after it is generated. NOTE: currently does not work. + :param audio_job_delay: The number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited. (FakeYou only) + :param audio_poll_delay: The number of seconds to wait between polling for audio generation job completion. (FakeYou only) """ from .models import Script, VideoResult from .script import write_script @@ -53,7 +57,12 @@ def create_sitcom( if art_style: initial_script = initial_script.replace(metadata=initial_script.metadata.replace(art_style=art_style)) - script_with_voices = add_voices(initial_script, engine="fakeyou" if not debug else "gtts") + script_with_voices = add_voices( + initial_script, + engine="fakeyou" if not debug else "gtts", + audio_job_delay=audio_job_delay, + audio_poll_delay=audio_poll_delay, + ) script_with_images = add_images(script_with_voices, engine="stability" if not debug else "pillow") # could theoretically be done in parallel with the audio script_with_music = add_music(script_with_images) diff --git a/sitcom_simulator/cli.py b/sitcom_simulator/cli.py index 3e4fdeb..7dc6844 100644 --- a/sitcom_simulator/cli.py +++ b/sitcom_simulator/cli.py @@ -17,7 +17,9 @@ def _parse_args(): parser.add_argument('-u', '--upload', action="store_true", help="upload the generated video to YouTube") parser.add_argument('-m', '--manual-select-characters', action="store_true", help="manually select characters instead of using the AI to select them") parser.add_argument('-d', '--debug', action='store_true', help="skip expensive API calls, generating robotic TTS and blank images instead.") - + parser.add_argument('--font', type=str, help="the font to use for the video", default='Arial') + parser.add_argument('--audio-job-delay', type=int, default=30, help="the number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited") + parser.add_argument('--audio-poll-delay', type=int, default=10, help="the number of seconds to wait between polling for audio generation job completion") args = parser.parse_args() return args @@ -26,13 +28,6 @@ def main(): The main entry point for the CLI, invoked when the module is run as a script. """ print("\nSitcom Simulator\nBy Josh Moody\n") - - try: - with open("config.toml", "rb") as f: - config = tomllib.load(f) - except FileNotFoundError: - # no big deal - config = {} args = _parse_args() # do the magic @@ -41,7 +36,11 @@ def main(): art_style=args.style, script_path=args.script_path, debug=args.debug, - font=config.get("font", 'Arial'), + font=args.font, manual_select_characters=args.manual_select_characters, max_tokens=args.max_tokens, + approve_script=args.approve_script, + upload_to_yt=args.upload, + audio_job_delay=args.audio_job_delay, + audio_poll_delay=args.audio_poll_delay, ) \ No newline at end of file diff --git a/sitcom_simulator/image/image_generator.py b/sitcom_simulator/image/image_generator.py index 128a397..48c5446 100644 --- a/sitcom_simulator/image/image_generator.py +++ b/sitcom_simulator/image/image_generator.py @@ -35,7 +35,7 @@ def generate_images( image_paths.append(clip.image_path) continue if engine == "stability": - full_prompt = f'{image_prompt}{', ' + script.metadata.art_style if script.metadata.art_style else ''}' + full_prompt = f'{image_prompt}{", " + script.metadata.art_style if script.metadata.art_style else ""}' image_path = stability.generate_image(prompt=full_prompt, width=width, height=height) else: # debug engine image_path = pillow.generate_image(width, height) diff --git a/sitcom_simulator/speech/integrations/fakeyou.py b/sitcom_simulator/speech/integrations/fakeyou.py index 200a92e..3491610 100644 --- a/sitcom_simulator/speech/integrations/fakeyou.py +++ b/sitcom_simulator/speech/integrations/fakeyou.py @@ -13,9 +13,7 @@ import tempfile import atexit -JOB_DELAY = 20 # seconds JOB_RANDOMNESS = 3 # +- this value, might help bypass rate limiting -POLL_DELAY = 8 POLL_RANDOMNESS = 1 def download_voice(url: str): @@ -72,7 +70,7 @@ def alphanumeric(string: str): :param string: The input string """ - return re.sub(r'[^a-zA-Z0-9 ]', '', string) + return re.sub(r'[^a-zA-Z0-9 ]', '_', string) def get_possible_characters_from_prompt(prompt: str) -> dict: """ @@ -101,7 +99,12 @@ def get_possible_characters_from_prompt(prompt: str) -> dict: return possible_characters -def generate_voices(script: Script, on_voice_url_generated: Optional[Callable[[int, str], None]] = None) -> List[str | None]: +def generate_voices( + script: Script, + on_voice_url_generated: Optional[Callable[[int, str], None]] = None, + job_delay:int=30, + poll_delay:int=10, + ) -> List[str | None]: """ Sequentially generates voices for each line in the script using the FakeYou API. It is intentionally slow to avoid getting rate limited. @@ -146,7 +149,7 @@ def generate_voices(script: Script, on_voice_url_generated: Optional[Callable[[i raise Exception("Some sort of FakeYou API error occured", json) break job_token = json['inference_job_token'] - rand_job_delay = random.randrange(JOB_DELAY-JOB_RANDOMNESS, JOB_DELAY+JOB_RANDOMNESS) + rand_job_delay = random.randrange(job_delay-JOB_RANDOMNESS, job_delay+JOB_RANDOMNESS) # poll the job until complete logging.debug(f'Polling voice job {i}') @@ -157,7 +160,7 @@ def generate_voices(script: Script, on_voice_url_generated: Optional[Callable[[i 'Accept': 'application/json' } while not completed: - rand_delay = random.randrange(POLL_DELAY-POLL_RANDOMNESS, POLL_DELAY+POLL_RANDOMNESS) + rand_delay = random.randrange(poll_delay-POLL_RANDOMNESS, poll_delay+POLL_RANDOMNESS) time.sleep(rand_delay) response = requests.get(f'https://api.fakeyou.com/tts/job/{job_token}', headers=headers) json = response.json() diff --git a/sitcom_simulator/speech/speech_generator.py b/sitcom_simulator/speech/speech_generator.py index 331f6de..409b29a 100644 --- a/sitcom_simulator/speech/speech_generator.py +++ b/sitcom_simulator/speech/speech_generator.py @@ -9,6 +9,8 @@ def generate_voices( engine:Engine="fakeyou", on_voice_downloaded: Optional[Callable[[int, str], None]] = None, fakeyou_on_voice_url_generated: Optional[Callable[[int, str], None]] = None, + fakeyou_job_delay:int=30, + fakeyou_poll_delay:int=10, ): """ Generates and returns a list of voice clip paths for the given script using the given engine. @@ -20,13 +22,20 @@ def generate_voices( :param engine: The engine to use for generating voice clips :param on_voice_downloaded: A callback to call after each voice clip is downloaded which takes the clip index and path to the downloaded audio :param fakeyou_on_voice_url_generated: A callback to call after each FakeYou voice clip is generated which takes the clip index and url of the generated audio + :param fakeyou_job_delay: The number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited + :param fakeyou_poll_delay: The number of seconds to wait between polling for audio generation job completion """ from .integrations import fakeyou as fakeyou from .integrations import gtts as gtts # generating voice clips can take a LONG time if args.high_quality_audio == True # because of long delays to avoid API timeouts on FakeYou.com if engine == "fakeyou": - audio_urls = fakeyou.generate_voices(script, fakeyou_on_voice_url_generated) + audio_urls = fakeyou.generate_voices( + script, + fakeyou_on_voice_url_generated, + fakeyou_job_delay, + fakeyou_poll_delay, + ) audio_paths = [] for i, audio_url in enumerate(audio_urls): if audio_url is None: continue @@ -43,7 +52,9 @@ def generate_voices( def add_voices( script: Script, engine:Engine="fakeyou", - on_voice_generated: Optional[Callable[[int, str], None]] = None + on_voice_generated: Optional[Callable[[int, str], None]] = None, + audio_job_delay:int=30, + audio_poll_delay:int=10, ): """ Given a script, returns the same script but with the audio paths filled in. @@ -53,6 +64,14 @@ def add_voices( :param script: The script to add voices to :param engine: The engine to use for generating voice clips :param on_voice_generated: A callback to call after each voice clip is generated which takes the clip index and path to the generated audio + :param audio_job_delay: The number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited. (FakeYou only) + :param audio_poll_delay: The number of seconds to wait between polling for audio generation job completion. (FakeYou only) """ - audio_paths = generate_voices(script, engine=engine, fakeyou_on_voice_url_generated=on_voice_generated) + audio_paths = generate_voices( + script, + engine=engine, + fakeyou_on_voice_url_generated=on_voice_generated, + fakeyou_job_delay=audio_job_delay, + fakeyou_poll_delay=audio_poll_delay, + ) return script.replace(clips=[clip.replace(audio_path=audio_path) for clip, audio_path in zip(script.clips, audio_paths)]) \ No newline at end of file From 0fb0dc324caee55acdfb657599868a9a7a2ccc50 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Fri, 16 Feb 2024 18:40:36 -0700 Subject: [PATCH 13/40] renamed some parameters for clarity --- pyproject.toml | 2 +- sitcom_simulator/auto.py | 4 ++-- sitcom_simulator/speech/speech_generator.py | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 676d5af..4a46fa1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.1.0" +version = "0.1.1" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index c29ba8a..2e73938 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -60,8 +60,8 @@ def create_sitcom( script_with_voices = add_voices( initial_script, engine="fakeyou" if not debug else "gtts", - audio_job_delay=audio_job_delay, - audio_poll_delay=audio_poll_delay, + fakeyou_job_delay=audio_job_delay, + fakeyou_poll_delay=audio_poll_delay, ) script_with_images = add_images(script_with_voices, engine="stability" if not debug else "pillow") # could theoretically be done in parallel with the audio script_with_music = add_music(script_with_images) diff --git a/sitcom_simulator/speech/speech_generator.py b/sitcom_simulator/speech/speech_generator.py index 409b29a..4372f8e 100644 --- a/sitcom_simulator/speech/speech_generator.py +++ b/sitcom_simulator/speech/speech_generator.py @@ -53,8 +53,8 @@ def add_voices( script: Script, engine:Engine="fakeyou", on_voice_generated: Optional[Callable[[int, str], None]] = None, - audio_job_delay:int=30, - audio_poll_delay:int=10, + fakeyou_job_delay:int=30, + fakeyou_poll_delay:int=10, ): """ Given a script, returns the same script but with the audio paths filled in. @@ -64,14 +64,14 @@ def add_voices( :param script: The script to add voices to :param engine: The engine to use for generating voice clips :param on_voice_generated: A callback to call after each voice clip is generated which takes the clip index and path to the generated audio - :param audio_job_delay: The number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited. (FakeYou only) - :param audio_poll_delay: The number of seconds to wait between polling for audio generation job completion. (FakeYou only) + :param fakeyou_job_delay: The number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited. (FakeYou only) + :param fakeyou_poll_delay: The number of seconds to wait between polling for audio generation job completion. (FakeYou only) """ audio_paths = generate_voices( script, engine=engine, fakeyou_on_voice_url_generated=on_voice_generated, - fakeyou_job_delay=audio_job_delay, - fakeyou_poll_delay=audio_poll_delay, + fakeyou_job_delay=fakeyou_job_delay, + fakeyou_poll_delay=fakeyou_poll_delay, ) return script.replace(clips=[clip.replace(audio_path=audio_path) for clip, audio_path in zip(script.clips, audio_paths)]) \ No newline at end of file From bd9c8f962cdc60b1662389c6562659fa4c034f0f Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Wed, 21 Feb 2024 19:37:21 -0700 Subject: [PATCH 14/40] added custom instructions --- docs/index.rst | 1 + docs/llm_instructions.rst | 25 +++++++++++++++++ pyproject.toml | 2 +- .../script/integrations/chatgpt/chatgpt.py | 10 +++++-- .../character_extraction_instructions.txt | 8 ++++++ .../fakeyou/character_extractor.py | 28 ++++++++++++------- .../instructions.py => llm_instructions.txt} | 12 ++++---- sitcom_simulator/script/script_generator.py | 28 ++++++++++++++++--- 8 files changed, 90 insertions(+), 24 deletions(-) create mode 100644 docs/llm_instructions.rst create mode 100644 sitcom_simulator/script/integrations/fakeyou/character_extraction_instructions.txt rename sitcom_simulator/script/{integrations/chatgpt/instructions.py => llm_instructions.txt} (84%) diff --git a/docs/index.rst b/docs/index.rst index 35e99bb..88338eb 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,6 +20,7 @@ Sitcom Simulator :maxdepth: 2 :caption: Reference + llm_instructions api Index diff --git a/docs/llm_instructions.rst b/docs/llm_instructions.rst new file mode 100644 index 0000000..c8415ca --- /dev/null +++ b/docs/llm_instructions.rst @@ -0,0 +1,25 @@ +LLM Instructions +======================== + +Sitocm Simulator uses large language models (LLMs) in several places. +The default prompts are shown below. +They are overridable. +They are subject to change frequently as improvements are discovered. + +Script Writing +-------------- + +Parameters: ``characters``, ``music_categories``, ``prompt`` + +.. literalinclude:: ..//sitcom_simulator/script/llm_instructions.txt + :language: text + :caption: llm_instructions.txt + +Character Extraction +-------------------- + +Parameters: ``prompt`` + +.. literalinclude:: ..//sitcom_simulator/script/integrations/fakeyou/character_extraction_instructions.txt + :language: text + :caption: llm_instructions.txt \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4a46fa1..e5af87b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.1.1" +version = "0.1.2" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/script/integrations/chatgpt/chatgpt.py b/sitcom_simulator/script/integrations/chatgpt/chatgpt.py index 7fdeed0..fd8bfaa 100644 --- a/sitcom_simulator/script/integrations/chatgpt/chatgpt.py +++ b/sitcom_simulator/script/integrations/chatgpt/chatgpt.py @@ -1,14 +1,20 @@ -def chat(prompt: str, max_tokens:int=2048, temperature:float=1): +def chat( + prompt: str, + max_tokens:int=2048, + temperature:float=1, + model: str="gpt-3.5-turbo", + ): """ Given a prompt, returns a response from ChatGPT. :param prompt: The prompt for the chat :param max_tokens: The maximum number of tokens to generate :param temperature: The temperature to use when generating the response, which controls randomness. Higher values make the response more random, while lower values make the response more deterministic. + :param model: The model to use for the chat """ import openai completion = openai.ChatCompletion.create( - model="gpt-3.5-turbo", + model=model, temperature=temperature, max_tokens=max_tokens, messages=[ diff --git a/sitcom_simulator/script/integrations/fakeyou/character_extraction_instructions.txt b/sitcom_simulator/script/integrations/fakeyou/character_extraction_instructions.txt new file mode 100644 index 0000000..dcf99bb --- /dev/null +++ b/sitcom_simulator/script/integrations/fakeyou/character_extraction_instructions.txt @@ -0,0 +1,8 @@ +Generate a list of potential characters to use in a short video of this prompt: + +{prompt} + +Your results will be searched for in the FakeYou database for potential AI voices to use. +The characters must be likely to have an AI voice on the internet somewhere, e.g., famous people/characters. +Keep the list short and focused. +Structure your output as a pure JSON list of strings, no markdown. \ No newline at end of file diff --git a/sitcom_simulator/script/integrations/fakeyou/character_extractor.py b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py index 76c1c55..470505d 100644 --- a/sitcom_simulator/script/integrations/fakeyou/character_extractor.py +++ b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py @@ -6,26 +6,34 @@ import logging from typing import List -def generate_character_list(prompt: str) -> List[Character]: +def generate_character_list(prompt: str, custom_instructions: str | None=None) -> List[Character]: """ Uses a large language model to generate a list of possible famous characters related to the prompt. :param prompt: The user-submitted prompt + :param custom_instructions: A string containing custom instructions for the language model. Must contain the placeholder '{prompt}'. """ - - instructions = f"""Generate a list of potential characters to use in a short video of this prompt: - {prompt} - - Your results will be searched for in the FakeYou database for potential AI voices to use. - The characters must be likely to have an AI voice on the internet somewhere. - Keep the list short and focused. - Structure your output as a JSON list of strings. - """ + + if custom_instructions: + instructions = custom_instructions + else: + from pathlib import Path + current_file_path = Path(__file__).resolve() + current_dir = current_file_path.parent + instructions_path = current_dir / 'character_extraction_instructions.txt' + with open(instructions_path, 'r') as f: + instructions = f.read() + + if "{prompt}" not in instructions: + raise ValueError("Custom instructions file must contain the placeholder '{prompt}'") + instructions = instructions.format(prompt=prompt) + from sitcom_simulator.script.llm import chat import requests raw_response = chat(instructions) + logging.debug("Raw character extractor response from LLM:", raw_response) character_names = json.loads(raw_response) print("Characters proposed:", ", ".join(character_names)) diff --git a/sitcom_simulator/script/integrations/chatgpt/instructions.py b/sitcom_simulator/script/llm_instructions.txt similarity index 84% rename from sitcom_simulator/script/integrations/chatgpt/instructions.py rename to sitcom_simulator/script/llm_instructions.txt index c4cb43a..df6bb6e 100644 --- a/sitcom_simulator/script/integrations/chatgpt/instructions.py +++ b/sitcom_simulator/script/llm_instructions.txt @@ -1,6 +1,4 @@ -# I think textwrap has a feature to eliminate left padding, but I'm not sure how to use it. - -base_prompt = """You are a witty, avant-garde creative genius who writes short video scripts consisting of AI-generated still images and audio. +You are a witty, avant-garde creative genius who writes short video scripts consisting of AI-generated still images and audio. Your output should be structured in TOML. Your output file will have these top level parts in this order: clips and metadata @@ -35,7 +33,7 @@ [metadata] title: a clever title for the video. bgm_style: specifies the video's background music style from the set ({music_categories}). Avoid comedy when possible -art_style: appended to each image prompt. Be specific, e.g., "1980s sitcom", "cinematic bokeh blur", "claymation", "trending on artstation" +art_style: appended to each image prompt. Thorough and descriptive, e.g., "still from a 1980s sitcom on VHS, film grain with saturated colors and a low-budget aesthetic but impeccable composition." metadata is a table, and is always last, to give you time to ponder the title and styles AFTER writing the script. Do NOT put title at the beginning of the file. @@ -50,8 +48,8 @@ - Keep famous characters in character - End with a twist. NO generic, boring happy endings. - The last clip should always be an unexpected, wacky twist. -- Narrators should be used sparingly (it's better to hear from the characters directly) -- No TOML comments (#) +- Do not use narrators unless absolutely necessary. +- No TOML comments (#) or markdown. Just pure TOML. Now, take a deep breath and a shot of whiskey, and write a script for the following video: @@ -59,4 +57,4 @@ The characters at your disposal are: {characters} -Have fun!""" \ No newline at end of file +Have fun! \ No newline at end of file diff --git a/sitcom_simulator/script/script_generator.py b/sitcom_simulator/script/script_generator.py index 85ab074..6f25f39 100644 --- a/sitcom_simulator/script/script_generator.py +++ b/sitcom_simulator/script/script_generator.py @@ -10,6 +10,8 @@ def write_script( max_tokens:int=2048, require_approval:bool=False, temperature:float=0.5, + custom_script_instructions: str | None=None, + custom_character_instructions: str | None=None, fakeyou_characters:bool=True, ) -> Script: """ @@ -23,10 +25,12 @@ def write_script( :param max_tokens: The maximum number of tokens to generate :param require_approval: Whether to prompt the user to approve the generated script :param temperature: The temperature to use when generating the script + :param custom_script_instructions: A string containing custom instructions for the language model writing the script. Must contain the placeholders '{prompt}', '{music_categories}', and '{characters}'. + :param custom_character_instructions: A string containing custom instructions for the language model extracting the characters from the prompt. Must contain the placeholder '{prompt}'. :param fakeyou_characters: Whether to restrict character selection to only voices from fakeyou.com """ from ..speech.integrations.fakeyou import get_possible_characters_from_prompt - from .integrations.chatgpt import chatgpt, instructions + from .integrations.chatgpt import chatgpt from .integrations.fakeyou.character_extractor import generate_character_list from ..music.integrations.freepd import MusicCategory @@ -37,18 +41,34 @@ def write_script( select_characters: Callable = fakeyou_select_characters if fakeyou_characters else debug_select_characters characters = select_characters(possible_characters) else: - characters = generate_character_list(prompt) + characters = generate_character_list(prompt, custom_instructions=custom_character_instructions) characters_str = ", ".join([c.name for c in characters]) music_categories_str = ", ".join(MusicCategory.values()) - full_prompt = instructions.base_prompt.format(prompt=prompt, characters=characters_str, max_tokens=max_tokens, music_categories=music_categories_str) + + if custom_script_instructions: + instructions = custom_script_instructions + else: + from pathlib import Path + current_file_path = Path(__file__).resolve() + current_dir = current_file_path.parent + instructions_path = current_dir / "llm_instructions.txt" + with open(instructions_path, 'r') as f: + instructions = f.read() + + # check for placeholders + if "{prompt}" not in instructions or "{music_categories}" not in instructions or "{characters}" not in instructions: + raise ValueError("Custom instructions file must contain the placeholders '{prompt}', '{music_categories}', and '{characters}'") + + full_prompt = instructions.format(prompt=prompt, characters=characters_str, max_tokens=max_tokens, music_categories=music_categories_str) approved = False while not approved: raw_script= chatgpt.chat(full_prompt, temperature=temperature, max_tokens=max_tokens) + logging.debug("Raw script", raw_script) toml_script = tomllib.loads(raw_script) toml_script["characters"] = [asdict(c) for c in characters] # from characters to dict back to character. Refactor at some point. script = Script.from_dict(toml_script) - logging.debug(script) + logging.debug("TOML script", script) print(formatted_script(script)) if(require_approval): validated = None From bf09a04e9e6f5e7decc4d940dc2644ad7845f35f Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 26 Feb 2024 18:56:54 -0700 Subject: [PATCH 15/40] added the ability to customize the style of video captions --- pyproject.toml | 2 +- sitcom_simulator/auto.py | 13 +- sitcom_simulator/cli.py | 2 + sitcom_simulator/script/script_generator.py | 4 +- sitcom_simulator/video/integrations/ffmpeg.py | 189 +++++++++++++----- sitcom_simulator/video/video_generator.py | 46 ++++- 6 files changed, 195 insertions(+), 61 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e5af87b..fdd3beb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.1.2" +version = "0.2.0" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index 2e73938..5ccb064 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -10,6 +10,7 @@ def create_sitcom( upload_to_yt=False, audio_job_delay:int=30, audio_poll_delay:int=10, + caption_bg_style:str="box_shadow", ): """ Generates a sitcom video based on a prompt or a script file. @@ -26,8 +27,9 @@ def create_sitcom( :param upload_to_yt: If True, the video will be uploaded to YouTube after it is generated. NOTE: currently does not work. :param audio_job_delay: The number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited. (FakeYou only) :param audio_poll_delay: The number of seconds to wait between polling for audio generation job completion. (FakeYou only) + :param caption_bg_style: The style of the background behind the captions. """ - from .models import Script, VideoResult + from .models import VideoResult from .script import write_script from .speech import add_voices from .image import add_images @@ -70,7 +72,12 @@ def create_sitcom( filename = final_script.metadata.title[:50].strip() or 'render' if final_script.metadata.title else 'render' output_path = f"./{filename}.mp4" - final_video_path = render_video(script=final_script, font=font, output_path=output_path) + final_video_path = render_video( + script=final_script, + font=font, + output_path=output_path, + caption_bg_style=caption_bg_style, + ) result = VideoResult( path=final_video_path, @@ -78,6 +85,8 @@ def create_sitcom( description=prompt or 'an AI-generated meme video created with Sitcom Simulator' ) + print(f"Video generated at {final_video_path}") + # if upload_to_yt: # title = prompt # keywords = [word for word in prompt.split(' ') if len(word) > 3] if prompt else ["sitcom", "funny", "comedy", "ai", "deepfake"] diff --git a/sitcom_simulator/cli.py b/sitcom_simulator/cli.py index 7dc6844..a9b80c1 100644 --- a/sitcom_simulator/cli.py +++ b/sitcom_simulator/cli.py @@ -20,6 +20,7 @@ def _parse_args(): parser.add_argument('--font', type=str, help="the font to use for the video", default='Arial') parser.add_argument('--audio-job-delay', type=int, default=30, help="the number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited") parser.add_argument('--audio-poll-delay', type=int, default=10, help="the number of seconds to wait between polling for audio generation job completion") + parser.add_argument('--text-shadow', action='store_true', help="use text shadow for captions instead of box background") args = parser.parse_args() return args @@ -43,4 +44,5 @@ def main(): upload_to_yt=args.upload, audio_job_delay=args.audio_job_delay, audio_poll_delay=args.audio_poll_delay, + caption_bg_style="text_shadow" if args.text_shadow else "box_shadow" ) \ No newline at end of file diff --git a/sitcom_simulator/script/script_generator.py b/sitcom_simulator/script/script_generator.py index 6f25f39..17acbda 100644 --- a/sitcom_simulator/script/script_generator.py +++ b/sitcom_simulator/script/script_generator.py @@ -10,6 +10,7 @@ def write_script( max_tokens:int=2048, require_approval:bool=False, temperature:float=0.5, + model:str="gpt-3.5-turbo", custom_script_instructions: str | None=None, custom_character_instructions: str | None=None, fakeyou_characters:bool=True, @@ -25,6 +26,7 @@ def write_script( :param max_tokens: The maximum number of tokens to generate :param require_approval: Whether to prompt the user to approve the generated script :param temperature: The temperature to use when generating the script + :param model: The language model to use :param custom_script_instructions: A string containing custom instructions for the language model writing the script. Must contain the placeholders '{prompt}', '{music_categories}', and '{characters}'. :param custom_character_instructions: A string containing custom instructions for the language model extracting the characters from the prompt. Must contain the placeholder '{prompt}'. :param fakeyou_characters: Whether to restrict character selection to only voices from fakeyou.com @@ -63,7 +65,7 @@ def write_script( full_prompt = instructions.format(prompt=prompt, characters=characters_str, max_tokens=max_tokens, music_categories=music_categories_str) approved = False while not approved: - raw_script= chatgpt.chat(full_prompt, temperature=temperature, max_tokens=max_tokens) + raw_script= chatgpt.chat(full_prompt, temperature=temperature, max_tokens=max_tokens, model=model) logging.debug("Raw script", raw_script) toml_script = tomllib.loads(raw_script) toml_script["characters"] = [asdict(c) for c in characters] # from characters to dict back to character. Refactor at some point. diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index 5a7021b..aa12758 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -1,3 +1,4 @@ +import random from ...models import Script, Clip from typing import List import os @@ -5,16 +6,115 @@ from tqdm import tqdm import tempfile import atexit +from dataclasses import dataclass + +@dataclass +class ShadowSettings: + """ + Settings for shadows in a video. + + :param color: The color of the shadow + :param alpha: The alpha of the shadow + :param x: The x offset of the shadow + :param y: The y offset of the shadow + """ + color: str = 'black' + alpha: float = 0.7 + x: int = 5 + y: int = 5 + + def to_dict(self): + """ + Returns a dictionary representation of the shadow settings for use in an FFmpeg filter. + """ + return { + "shadowcolor": f"{self.color}@{self.alpha}", + "shadowx": self.x, + "shadowy": self.y + } + +@dataclass +class BoxSettings: + """ + Settings for boxes in a video. + + :param color: The color of the box + :param alpha: The alpha of the box + :param border_width: The width of the box border + """ + color: str = 'black' + alpha: float = 0.5 + border_width: int = 10 + + def to_dict(self): + """ + Returns a dictionary representation of the box settings for use in an FFmpeg filter. + """ + return { + "box": 1, + "boxcolor": f"{self.color}@{self.alpha}", + "boxborderw": self.border_width + } + +@dataclass +class CaptionSettings: + """ + Settings for captions in a video. + + :param font: The path to the font file to use for the captions + :param max_width: The maximum width of the captions, in characters + :param y_ratio_from_bottom: The y ratio from the bottom of the screen to place the captions + """ + font: str = 'Arial' + max_width: int = 30 + y_ratio_from_bottom: float = 6/24 + + def formatted_caption(self, text: str): + """ + Renders a caption with the given text and returns the caption string. + + :param text: The text of the caption + :param width: The width of the video + :param height: The height of the video + """ + return textwrap.fill(text, width=self.max_width) + +@dataclass +class ClipSettings: + """ + Settings for rendering video clips. + + :param clip_buffer_seconds: How much time to wait after characters finish talking + :param min_clip_seconds: The minimum time to hold on a clip + :param speaking_delay_seconds: Delay before the audio kicks in + """ + clip_buffer_seconds:float=0.15 + min_clip_seconds:float=1.5 + speaking_delay_seconds:float=0.12 + +failed_image_captions = [ + "This image has been seized by the FBI", + "REDACTED", + "This image has been classified", + "CENSORED", + "This image has been confiscated", + "This image has been banned in your country", + "This image has been quarantined", + "[image too dangerous to be seen by human eyes]", + "[Intense Violence]", + "[Innappropriate Content]", + "[Explicit Content]", + "[Scandalous Content]", + "Image seized by the government", +] def render_clip( clip: Clip, - font: str, width:int=720, height:int=1280, - clip_buffer_seconds:float=0.15, - min_clip_seconds:float=1.5, - speaking_delay_seconds:float=0.12, - caption_max_width:int=30, + clip_settings:ClipSettings=ClipSettings(), + caption_settings:CaptionSettings=CaptionSettings(), + caption_bg_settings:BoxSettings|ShadowSettings=BoxSettings(), ): """ Renders a video clip from the given clip object and returns the path to the rendered video file. @@ -23,24 +123,18 @@ def render_clip( :param font: The path to the font file to use for the captions :param width: The width of the video :param height: The height of the video - :param clip_buffer_seconds: How much time to wait after characters finish talking - :param min_clip_seconds: The minimum time to hold on a clip - :param speaking_delay_seconds: Delay before the audio kicks in + :param clip_settings: The settings for rendering the video clip :param caption_max_width: The maximum width of the captions, in characters + :param caption_settings: The settings for the captions + :param caption_bg_settings: The settings for the caption background """ import ffmpeg - caption = clip.speech + caption = clip.speech or clip.title + title_clip = not not clip.title if caption: - caption = textwrap.fill(caption, width=caption_max_width) - - subtitle_y_ratio_from_bottom = 6/24 - scale_factor = width / 720 + caption = caption_settings.formatted_caption(caption) - # If you want to add a shadow: - shadow_style = ":shadowcolor=black@0.7:shadowx=3:shadowy=3" - # If you want to add a transparent grey background box: - box_style = ":box=1:boxcolor=black@0.4:boxborderw=10" - subtitle_style = box_style # + box_style # mix and match as desired + scale_factor = width / 720 # 720 is the reference screen width try: audio_path = clip.audio_path.replace('/', '\\') if os.name == 'nt' else clip.audio_path @@ -50,12 +144,15 @@ def render_clip( print(clip.audio_path) audio_duration = 0 - duration = audio_duration + clip_buffer_seconds + speaking_delay_seconds - duration = max(duration, min_clip_seconds) + duration = audio_duration + clip_settings.clip_buffer_seconds + clip_settings.speaking_delay_seconds + duration = max(duration, clip_settings.min_clip_seconds) if clip.duration and not clip.speaker: # 'not speaker' in case the llm forgets proper syntax duration = clip.duration - if clip.image_path is None: + no_image = clip.image_path is None + seized_image = clip.image_path is None and not title_clip + + if no_image or seized_image: video_input = ffmpeg.input(f'color=c=black:s={width}x{height}:d=5', f='lavfi') else: video_input = ( @@ -64,7 +161,7 @@ def render_clip( .filter('crop', width, height) ) - speaking_delay_ms =speaking_delay_seconds * 1000 + speaking_delay_ms = clip_settings.speaking_delay_seconds * 1000 # make sure every clip has an audio track, even if it's silent if clip.audio_path is None: @@ -77,26 +174,20 @@ def render_clip( .filter('apad', pad_dur=duration) ) - # Modify the video input to include subtitles - - if caption: + caption_bg_dict = caption_bg_settings.to_dict() if isinstance(caption_bg_settings, BoxSettings) else caption_bg_settings.to_dict() + + if caption or seized_image: video_input = video_input.filter( 'drawtext', - text=caption, - fontfile=font, + text=caption if caption else random.choice(failed_image_captions), + fontfile=caption_settings.font, fontsize=42 * scale_factor, # scales the font size with 720px as the reference screen width fontcolor='white', text_align="M+C", # had to dig deep into FFmpeg source code to learn that you combine flags with a plus sign x='(w - text_w) / 2', - y=f'(h - (text_h / 2)) - h*{subtitle_y_ratio_from_bottom}', **{ - "shadowcolor": "black@0.6", - "shadowx": -4 * scale_factor, - "shadowy": 4 * scale_factor, - } if subtitle_style == shadow_style else { - "box": 1, - "boxcolor": "black@0.5", - "boxborderw": 10 * scale_factor - }) + y=f'(h - (text_h / 2)) - h*{caption_settings.y_ratio_from_bottom if not title_clip else 0.5}', + **caption_bg_dict, + ) try: input_streams = [video_input] if audio_input is None else [video_input, audio_input] @@ -104,7 +195,7 @@ def render_clip( intermediate_clip = ( ffmpeg.output(*input_streams, temp_file.name, vcodec='libx264', preset='superfast', acodec='mp3', t=duration) .overwrite_output() - .run() + .run(capture_stderr=True, overwrite_output=True) ) atexit.register(os.remove, temp_file.name) return temp_file.name @@ -176,14 +267,12 @@ def concatenate_clips( # TODO: support aspect ratios 16:9 and 1:1 def render_video( script: Script, - font: str, output_path: str = 'output.mp4', width:int=720, height:int=1280, - clip_buffer_seconds=0.15, - min_clip_length=1.5, - speaking_delay_seconds=0.12, - caption_max_width=30, + clip_settings:ClipSettings=ClipSettings(), + caption_settings:CaptionSettings=CaptionSettings(), + caption_bg_settings:BoxSettings|ShadowSettings=BoxSettings(), ): """ Renders a video from the given script and returns the path to the rendered video file. @@ -191,26 +280,22 @@ def render_video( At present, only 9:16 aspect ratio is supported, but 16:9 and 1:1 will be supported in the future. :param script: The script to render - :param font: The path to the font file to use for the captions :param output_path: The path to save the rendered video :param width: The width of the video :param height: The height of the video - :param clip_buffer_seconds: How much time to wait after characters finish talking - :param min_clip_length: The minimum time to hold on a clip - :param speaking_delay_seconds: Delay before the audio kicks in - :param caption_max_width: The maximum width of the captions, in characters + :param clip_settings: The settings for rendering the video clip + :param caption_settings: The settings for the captions + :param caption_bg_settings: The settings for the caption background """ intermediate_clips = [] for clip in tqdm(script.clips, desc="Rendering intermediate video clips"): clip_file = render_clip( clip=clip, - font=font, width=width, height=height, - clip_buffer_seconds=clip_buffer_seconds, - min_clip_seconds=min_clip_length, - speaking_delay_seconds=speaking_delay_seconds, - caption_max_width=caption_max_width, + clip_settings=clip_settings, + caption_settings=caption_settings, + caption_bg_settings=caption_bg_settings, ) intermediate_clips.append(clip_file) diff --git a/sitcom_simulator/video/video_generator.py b/sitcom_simulator/video/video_generator.py index cdc8050..f014184 100644 --- a/sitcom_simulator/video/video_generator.py +++ b/sitcom_simulator/video/video_generator.py @@ -1,6 +1,8 @@ -from typing import List +from typing import List, Literal from ..models import Script +CaptionBg = Literal['box_shadow', 'text_shadow', 'none'] + def render_video( script: Script, font: str, @@ -8,7 +10,13 @@ def render_video( width:int=1080, height:int=1920, clip_buffer_seconds=0.35, - min_clip_length=1.5, + min_clip_seconds=1.5, + speaking_delay_seconds=0.12, + caption_bg_style:CaptionBg='box_shadow', + caption_bg_alpha=0.6, + caption_bg_color="black", + caption_bg_shadow_distance_x=5, + caption_bg_shadow_distance_y=5, ): """ Renders a video from the given script and returns the path to the rendered video. @@ -20,6 +28,12 @@ def render_video( :param height: The height of the video to render :param clip_buffer_seconds: How much time to wait after characters finish talking :param min_clip_length: The minimum time to hold on a clip + :param speaking_delay_seconds: How much time to wait after a character starts talking + :param caption_bg_style: The style of the background behind the captions + :param caption_bg_alpha: The alpha of the background behind the captions + :param caption_bg_color: The color of the background behind the captions + :param caption_bg_shadow_distance_x: The x distance of the shadow behind the captions + :param caption_bg_shadow_distance_y: The y distance of the shadow behind the captions """ # rely on image_path first, but if it's not there and image_url is, download the image import requests @@ -55,12 +69,34 @@ def render_video( logging.error(f"Failed to download audio for clip {i}: {e}") from .integrations import ffmpeg + from .integrations.ffmpeg import ClipSettings, CaptionSettings, BoxSettings, ShadowSettings + + caption_bg_settings = None + if caption_bg_style == 'box_shadow': + caption_bg_settings = BoxSettings( + alpha=caption_bg_alpha, + color=caption_bg_color, + ) + elif caption_bg_style == 'text_shadow': + caption_bg_settings = ShadowSettings( + alpha=caption_bg_alpha, + color='black', + x=caption_bg_shadow_distance_x, + y=caption_bg_shadow_distance_y, + ) + return ffmpeg.render_video( script=script, - font=font, output_path=output_path, width=width, height=height, - clip_buffer_seconds=clip_buffer_seconds, - min_clip_length=min_clip_length + caption_settings=CaptionSettings( + font=font, + ), + clip_settings=ClipSettings( + clip_buffer_seconds=clip_buffer_seconds, + min_clip_seconds=min_clip_seconds, + speaking_delay_seconds=speaking_delay_seconds, + ), + caption_bg_settings=caption_bg_settings, ) \ No newline at end of file From d00ba7ce28b9cd226dae6f707eee89dffe3fb5a2 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 26 Feb 2024 21:40:14 -0700 Subject: [PATCH 16/40] added fakeyou authorization --- docs/conf.py | 4 +-- example.env | 6 +++- pyproject.toml | 3 +- requirements.txt | 3 +- sitcom_simulator/auto.py | 17 ++++++++--- sitcom_simulator/cli.py | 10 +++++-- .../fakeyou/character_selector.py | 4 +-- sitcom_simulator/script/script_generator.py | 6 ++-- .../speech/integrations/fakeyou.py | 29 ++++++++++++++++++- 9 files changed, 64 insertions(+), 18 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 6dcfe2d..1ce3b93 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,13 +6,13 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -import tomllib +import toml import os import sys sys.path.insert(0, os.path.abspath('..')) # enable importing sitcom_simulator with open('../pyproject.toml', 'rb') as pyproject: - pyproject = tomllib.load(pyproject) + pyproject = toml.load(pyproject) project = pyproject['project']['name'] author = pyproject['project']['authors'][0]['name'] diff --git a/example.env b/example.env index 24addf0..ded40f5 100644 --- a/example.env +++ b/example.env @@ -1,3 +1,7 @@ # copy this file to '.env' and replace the values with your personal API keys STABILITY_API_KEY='your_key_here' -OPENAI_API_KEY='your_key_here' \ No newline at end of file +OPENAI_API_KEY='your_key_here' + +# optional, but speeds up voice generation +FAKEYOU_USERNAME='your_username_or_email_here' +FAKEYOU_PASSWORD='your_password_here' \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index fdd3beb..5e4e353 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.2.0" +version = "0.3.0" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] @@ -23,6 +23,7 @@ dependencies = [ "Pillow~=10.0.1", "beautifulsoup4~=4.12.2", "requests~=2.31.0", + "toml", # "fakeyou==1.2.5", Currently using raw HTTP requests instead # "moviepy==1.0.3", No longer supported due to lack of features. Using ffmpeg-python instead ] diff --git a/requirements.txt b/requirements.txt index 83dc7a2..cac305d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ gtts~=2.3.2 Pillow~=10.0.1 beautifulsoup4~=4.12.2 requests~=2.31.0 -mypy~=1.8.0 \ No newline at end of file +mypy~=1.8.0 +toml \ No newline at end of file diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index 5ccb064..2ef6edb 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -2,7 +2,8 @@ def create_sitcom( prompt: str | None = None, art_style: str | None = None, script_path: str | None = None, - debug: bool=False, + debug_images: bool=False, + debug_audio: bool=False, font: str = 'Arial', max_tokens:int=2048, approve_script:bool=False, @@ -11,6 +12,7 @@ def create_sitcom( audio_job_delay:int=30, audio_poll_delay:int=10, caption_bg_style:str="box_shadow", + save_script:bool=False, ): """ Generates a sitcom video based on a prompt or a script file. @@ -49,7 +51,7 @@ def create_sitcom( manual_character_selection=manual_select_characters, max_tokens=max_tokens, require_approval=approve_script, - fakeyou_characters=not debug, + fakeyou_characters=not debug_audio, ) elif script_path and not prompt: initial_script = script_from_file(script_path) @@ -61,11 +63,11 @@ def create_sitcom( script_with_voices = add_voices( initial_script, - engine="fakeyou" if not debug else "gtts", + engine="fakeyou" if not debug_audio else "gtts", fakeyou_job_delay=audio_job_delay, fakeyou_poll_delay=audio_poll_delay, ) - script_with_images = add_images(script_with_voices, engine="stability" if not debug else "pillow") # could theoretically be done in parallel with the audio + script_with_images = add_images(script_with_voices, engine="stability" if not debug_images else "pillow") # could theoretically be done in parallel with the audio script_with_music = add_music(script_with_images) final_script = script_with_music @@ -87,6 +89,13 @@ def create_sitcom( print(f"Video generated at {final_video_path}") + if save_script: + import toml + from dataclasses import asdict + with open(f"./{filename}.toml", 'w') as f: + f.write(toml.dumps(asdict(final_script))) + print(f"Script saved at ./{filename}.toml") + # if upload_to_yt: # title = prompt # keywords = [word for word in prompt.split(' ') if len(word) > 3] if prompt else ["sitcom", "funny", "comedy", "ai", "deepfake"] diff --git a/sitcom_simulator/cli.py b/sitcom_simulator/cli.py index a9b80c1..fca96c1 100644 --- a/sitcom_simulator/cli.py +++ b/sitcom_simulator/cli.py @@ -1,6 +1,5 @@ from .auto import create_sitcom import argparse -import tomllib def _parse_args(): parser = argparse.ArgumentParser( @@ -17,10 +16,13 @@ def _parse_args(): parser.add_argument('-u', '--upload', action="store_true", help="upload the generated video to YouTube") parser.add_argument('-m', '--manual-select-characters', action="store_true", help="manually select characters instead of using the AI to select them") parser.add_argument('-d', '--debug', action='store_true', help="skip expensive API calls, generating robotic TTS and blank images instead.") + parser.add_argument('--debug-images', action='store_true', help="skip expensive image generation API calls, generating blank images instead.") + parser.add_argument('--debug-audio', action='store_true', help="skip slow voice generation API calls, generating robotic TTS instead.") parser.add_argument('--font', type=str, help="the font to use for the video", default='Arial') parser.add_argument('--audio-job-delay', type=int, default=30, help="the number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited") parser.add_argument('--audio-poll-delay', type=int, default=10, help="the number of seconds to wait between polling for audio generation job completion") parser.add_argument('--text-shadow', action='store_true', help="use text shadow for captions instead of box background") + parser.add_argument('--save-script', action='store_true', help="save the generated script to a file") args = parser.parse_args() return args @@ -36,7 +38,8 @@ def main(): prompt=args.prompt, art_style=args.style, script_path=args.script_path, - debug=args.debug, + debug_images=args.debug_images or args.debug, + debug_audio=args.debug_audio or args.debug, font=args.font, manual_select_characters=args.manual_select_characters, max_tokens=args.max_tokens, @@ -44,5 +47,6 @@ def main(): upload_to_yt=args.upload, audio_job_delay=args.audio_job_delay, audio_poll_delay=args.audio_poll_delay, - caption_bg_style="text_shadow" if args.text_shadow else "box_shadow" + caption_bg_style="text_shadow" if args.text_shadow else "box_shadow", + save_script=args.save_script, ) \ No newline at end of file diff --git a/sitcom_simulator/script/integrations/fakeyou/character_selector.py b/sitcom_simulator/script/integrations/fakeyou/character_selector.py index b8dc68e..d57e781 100644 --- a/sitcom_simulator/script/integrations/fakeyou/character_selector.py +++ b/sitcom_simulator/script/integrations/fakeyou/character_selector.py @@ -1,4 +1,4 @@ -import tomllib +import toml from sitcom_simulator.models import Character import os @@ -6,7 +6,7 @@ script_dir = os.path.dirname(os.path.realpath(__file__)) characters_path = os.path.join(script_dir, 'characters.toml') with open(characters_path, "rb") as f: - curated_characters = tomllib.load(f) + curated_characters = toml.load(f) # user selects which auto-detected characters to include in the script # (including their voices if generating high-quality audio) diff --git a/sitcom_simulator/script/script_generator.py b/sitcom_simulator/script/script_generator.py index 17acbda..630385a 100644 --- a/sitcom_simulator/script/script_generator.py +++ b/sitcom_simulator/script/script_generator.py @@ -1,6 +1,6 @@ from typing import Callable from ..models import Script -import tomllib +import toml from dataclasses import asdict import logging @@ -67,7 +67,7 @@ def write_script( while not approved: raw_script= chatgpt.chat(full_prompt, temperature=temperature, max_tokens=max_tokens, model=model) logging.debug("Raw script", raw_script) - toml_script = tomllib.loads(raw_script) + toml_script = toml.loads(raw_script) toml_script["characters"] = [asdict(c) for c in characters] # from characters to dict back to character. Refactor at some point. script = Script.from_dict(toml_script) logging.debug("TOML script", script) @@ -86,7 +86,7 @@ def write_script( def script_from_file(path: str) -> Script: with open(path, "rb") as f: - script = Script.from_dict(tomllib.load(f)) + script = Script.from_dict(toml.load(f)) print(type(script)) return script diff --git a/sitcom_simulator/speech/integrations/fakeyou.py b/sitcom_simulator/speech/integrations/fakeyou.py index 3491610..04adf1f 100644 --- a/sitcom_simulator/speech/integrations/fakeyou.py +++ b/sitcom_simulator/speech/integrations/fakeyou.py @@ -108,11 +108,34 @@ def generate_voices( """ Sequentially generates voices for each line in the script using the FakeYou API. It is intentionally slow to avoid getting rate limited. + It can be sped up by having FAKEYOU_USERNAME and FAKEYOU_PASSWORD set as environment variables. :param script: The script to generate voices for :param on_voice_generated: A callback function to call when a voice is generated which takes the clip index and the URL of the generated audio + :param job_delay: The number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited + :param poll_delay: The number of seconds to wait between polling for audio generation job completion + :param username_or_email: The username or email to use for the FakeYou API (optional, but increases render speed) + :param password: The password to use for the FakeYou API (optional, but increases render speed) """ + + username_or_email = os.environ.get('FAKEYOU_USERNAME') + password = os.environ.get('FAKEYOU_PASSWORD') + import requests + cookie = None + if username_or_email and password: + response = requests.post('https://api.fakeyou.com/v1/login', + json={"username_or_email": username_or_email, "password": password} + ) + auth_data = response.json() + if not auth_data['success']: + logging.exception("Failed to log in to FakeYou API") + else: + logging.info("Logged in to FakeYou API") + print("Logged in to FakeYou API") + cookie = response.headers.get('Set-Cookie') + cookie = re.search(r'\w+.=([^;]+)', cookie).group(1) + audio_urls: List[str | None] = [] for i, clip in tqdm(enumerate(script.clips), desc="Generating voices", total=len(script.clips)): # skip if doesn't need audio, or if audio already exists (audio should never already exist, but just in case) @@ -131,8 +154,12 @@ def generate_voices( voice_token = character.voice_token headers = { 'Accept': 'application/json', - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', } + if cookie: + headers['cookie'] = f"session={cookie}" + headers["credentials"] = "include" + payload = { "uuid_idempotency_token": entropy, "tts_model_token": voice_token, From 7ca3c7a6fa51db086d9bc6307a13399b1689c68d Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 26 Feb 2024 23:15:41 -0700 Subject: [PATCH 17/40] reduced the number of sign-in requests to fakeyou --- pyproject.toml | 2 +- .../speech/integrations/fakeyou.py | 40 +++++++++---------- sitcom_simulator/speech/speech_generator.py | 7 ++++ 3 files changed, 28 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5e4e353..2215e98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.3.0" +version = "0.3.1" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/speech/integrations/fakeyou.py b/sitcom_simulator/speech/integrations/fakeyou.py index 04adf1f..0d7eaf4 100644 --- a/sitcom_simulator/speech/integrations/fakeyou.py +++ b/sitcom_simulator/speech/integrations/fakeyou.py @@ -99,11 +99,30 @@ def get_possible_characters_from_prompt(prompt: str) -> dict: return possible_characters +def sign_in(username_or_email: str, password: str) -> str: + """ + Signs in to the FakeYou API and returns the session cookie. + """ + import requests + response = requests.post('https://api.fakeyou.com/v1/login', + json={"username_or_email": username_or_email, "password": password} + ) + auth_data = response.json() + if not auth_data['success']: + logging.exception("Failed to log in to FakeYou API") + else: + logging.info("Logged in to FakeYou API") + print("Logged in to FakeYou API") + cookie = response.headers.get('Set-Cookie') + cookie = re.search(r'\w+.=([^;]+)', cookie).group(1) + return cookie + def generate_voices( script: Script, on_voice_url_generated: Optional[Callable[[int, str], None]] = None, job_delay:int=30, poll_delay:int=10, + cookie:str|None=None, ) -> List[str | None]: """ Sequentially generates voices for each line in the script using the FakeYou API. @@ -114,28 +133,9 @@ def generate_voices( :param on_voice_generated: A callback function to call when a voice is generated which takes the clip index and the URL of the generated audio :param job_delay: The number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited :param poll_delay: The number of seconds to wait between polling for audio generation job completion - :param username_or_email: The username or email to use for the FakeYou API (optional, but increases render speed) - :param password: The password to use for the FakeYou API (optional, but increases render speed) + :param cookie: The session cookie to use for the FakeYou API (acquired from sign_in) """ - - username_or_email = os.environ.get('FAKEYOU_USERNAME') - password = os.environ.get('FAKEYOU_PASSWORD') - import requests - cookie = None - if username_or_email and password: - response = requests.post('https://api.fakeyou.com/v1/login', - json={"username_or_email": username_or_email, "password": password} - ) - auth_data = response.json() - if not auth_data['success']: - logging.exception("Failed to log in to FakeYou API") - else: - logging.info("Logged in to FakeYou API") - print("Logged in to FakeYou API") - cookie = response.headers.get('Set-Cookie') - cookie = re.search(r'\w+.=([^;]+)', cookie).group(1) - audio_urls: List[str | None] = [] for i, clip in tqdm(enumerate(script.clips), desc="Generating voices", total=len(script.clips)): # skip if doesn't need audio, or if audio already exists (audio should never already exist, but just in case) diff --git a/sitcom_simulator/speech/speech_generator.py b/sitcom_simulator/speech/speech_generator.py index 4372f8e..a02d4b5 100644 --- a/sitcom_simulator/speech/speech_generator.py +++ b/sitcom_simulator/speech/speech_generator.py @@ -1,6 +1,7 @@ from typing import List, Literal from sitcom_simulator.models import Script from typing import Optional, Callable +import os Engine = Literal["fakeyou", "gtts"] @@ -30,11 +31,17 @@ def generate_voices( # generating voice clips can take a LONG time if args.high_quality_audio == True # because of long delays to avoid API timeouts on FakeYou.com if engine == "fakeyou": + username_or_email = os.environ.get('FAKEYOU_USERNAME') + password = os.environ.get('FAKEYOU_PASSWORD') + fakeyou_cookie = None + if username_or_email and password: + fakeyou_cookie = fakeyou.sign_in(username_or_email, password) audio_urls = fakeyou.generate_voices( script, fakeyou_on_voice_url_generated, fakeyou_job_delay, fakeyou_poll_delay, + cookie=fakeyou_cookie, ) audio_paths = [] for i, audio_url in enumerate(audio_urls): From 65776565006a3b1647cb18493696c18fb2353639 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 26 Feb 2024 23:18:12 -0700 Subject: [PATCH 18/40] fixed bugs related to toml script file loading --- docs/conf.py | 3 +-- .../script/integrations/fakeyou/character_selector.py | 3 +-- sitcom_simulator/script/script_generator.py | 7 +++---- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 1ce3b93..4214d7a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,8 +11,7 @@ import sys sys.path.insert(0, os.path.abspath('..')) # enable importing sitcom_simulator -with open('../pyproject.toml', 'rb') as pyproject: - pyproject = toml.load(pyproject) +pyproject = toml.load('../pyproject.toml') project = pyproject['project']['name'] author = pyproject['project']['authors'][0]['name'] diff --git a/sitcom_simulator/script/integrations/fakeyou/character_selector.py b/sitcom_simulator/script/integrations/fakeyou/character_selector.py index d57e781..82fdb41 100644 --- a/sitcom_simulator/script/integrations/fakeyou/character_selector.py +++ b/sitcom_simulator/script/integrations/fakeyou/character_selector.py @@ -5,8 +5,7 @@ # Get the directory of the current script file script_dir = os.path.dirname(os.path.realpath(__file__)) characters_path = os.path.join(script_dir, 'characters.toml') -with open(characters_path, "rb") as f: - curated_characters = toml.load(f) +curated_characters = toml.load(characters_path) # user selects which auto-detected characters to include in the script # (including their voices if generating high-quality audio) diff --git a/sitcom_simulator/script/script_generator.py b/sitcom_simulator/script/script_generator.py index 630385a..9a1e23a 100644 --- a/sitcom_simulator/script/script_generator.py +++ b/sitcom_simulator/script/script_generator.py @@ -85,10 +85,9 @@ def write_script( return script def script_from_file(path: str) -> Script: - with open(path, "rb") as f: - script = Script.from_dict(toml.load(f)) - print(type(script)) - return script + script = Script.from_dict(toml.load(path)) + print(type(script)) + return script def formatted_script(script: Script) -> str: metadata = f"Title: {script.metadata.title or ''}\nStyle: {script.metadata.art_style or ''}\n" From 40610d6f055a9fad934b40e1c66efb6efb895d26 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Thu, 29 Feb 2024 22:22:06 -0700 Subject: [PATCH 19/40] added many more video customization optoins --- pyproject.toml | 6 +- requirements.txt | 4 +- scripts/example_basic.toml | 14 +++ scripts/example_mario.toml | 26 +++--- sitcom_simulator/auto.py | 24 +++++- sitcom_simulator/cli.py | 8 ++ .../image/integrations/stability.py | 4 +- sitcom_simulator/models.py | 3 + sitcom_simulator/script/script_generator.py | 1 - .../speech/integrations/fakeyou.py | 5 +- sitcom_simulator/video/integrations/ffmpeg.py | 86 ++++++++++++++++--- sitcom_simulator/video/video_generator.py | 20 +++-- 12 files changed, 158 insertions(+), 43 deletions(-) create mode 100644 scripts/example_basic.toml diff --git a/pyproject.toml b/pyproject.toml index 2215e98..ff1802e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.3.1" +version = "0.4.0" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] @@ -14,12 +14,12 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "stability-sdk~=0.8.4", + "stability-sdk~=0.8.5", "python-dotenv~=1.0.0", "tqdm~=4.66.1", "openai~=0.28.0", "ffmpeg-python~=0.2.0", - "gtts~=2.3.2", + "gtts~=2.5.1", "Pillow~=10.0.1", "beautifulsoup4~=4.12.2", "requests~=2.31.0", diff --git a/requirements.txt b/requirements.txt index cac305d..7a6cd90 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ -stability-sdk~=0.8.4 +stability-sdk~=0.8.5 python-dotenv~=1.0.0 tqdm~=4.66.1 openai~=0.28.0 ffmpeg-python~=0.2.0 -gtts~=2.3.2 +gtts~=2.5.1 Pillow~=10.0.1 beautifulsoup4~=4.12.2 requests~=2.31.0 diff --git a/scripts/example_basic.toml b/scripts/example_basic.toml new file mode 100644 index 0000000..50951f2 --- /dev/null +++ b/scripts/example_basic.toml @@ -0,0 +1,14 @@ +[[characters]] +name = "Mario" +voice_token = "TM:6c6d3a8tavv6" + +[[clips]] +speaker = "Mario" +speech = "Luigi, I'm hungry. Do you mind if I have a bite of your soul?" +image_prompt = "Mario with red cap and mustache attempting to eat Luigi's soul, horror" +image_path = "C:\\Users\\joshm\\Pictures\\fb photo.jpg" + +[metadata] +title = "A Soulful Snack Encounter" +bgm_style = "upbeat" +art_style = "cinematic bokeh blur" \ No newline at end of file diff --git a/scripts/example_mario.toml b/scripts/example_mario.toml index b7b3570..ea93e68 100644 --- a/scripts/example_mario.toml +++ b/scripts/example_mario.toml @@ -1,3 +1,15 @@ +[[characters]] +name = "Mario" +voice_token = "TM:6c6d3a8tavv6" + +[[characters]] +name = "Luigi" +voice_token = "TM:fp4fcyja6mk1" + +[[characters]] +name = "Narrator" +voice_token = "TM:xrk8qhm6cb6r" + [[clips]] speaker = "Mario" speech = "Luigi, I'm hungry. Do you mind if I have a bite of your soul?" @@ -21,16 +33,4 @@ image_prompt = "Luigi offering a truce, Mario looking contemplative, comedy" [metadata] title = "A Soulful Snack Encounter" bgm_style = "upbeat" -art_style = "cinematic bokeh blur" - -[[characters]] -name = "Mario" -voice_token = "TM:6c6d3a8tavv6" - -[[characters]] -name = "Luigi" -voice_token = "TM:fp4fcyja6mk1" - -[[characters]] -name = "Narrator" -voice_token = "TM:xrk8qhm6cb6r" +art_style = "cinematic bokeh blur" \ No newline at end of file diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index 2ef6edb..c9328b3 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -13,6 +13,10 @@ def create_sitcom( audio_poll_delay:int=10, caption_bg_style:str="box_shadow", save_script:bool=False, + speed:float=1, + pan_and_zoom:bool=True, + width:int=720, + height:int=1280, ): """ Generates a sitcom video based on a prompt or a script file. @@ -30,6 +34,11 @@ def create_sitcom( :param audio_job_delay: The number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited. (FakeYou only) :param audio_poll_delay: The number of seconds to wait between polling for audio generation job completion. (FakeYou only) :param caption_bg_style: The style of the background behind the captions. + :param save_script: If True, the generated script will be saved to a file. + :param speed: The speed of the final video. 1.0 is normal speed. + :param disable_pan_and_zoom: If True, the pan and zoom effect on images will be disabled. + :param width: The width of the video to render. + :param height: The height of the video to render. """ from .models import VideoResult from .script import write_script @@ -67,18 +76,31 @@ def create_sitcom( fakeyou_job_delay=audio_job_delay, fakeyou_poll_delay=audio_poll_delay, ) - script_with_images = add_images(script_with_voices, engine="stability" if not debug_images else "pillow") # could theoretically be done in parallel with the audio + + # image gen could theoretically be done in parallel with the audio + script_with_images = add_images( + script_with_voices, + engine="stability" if not debug_images else "pillow", + width=width, + height=height, + ) + script_with_music = add_music(script_with_images) final_script = script_with_music filename = final_script.metadata.title[:50].strip() or 'render' if final_script.metadata.title else 'render' output_path = f"./{filename}.mp4" + final_video_path = render_video( script=final_script, font=font, output_path=output_path, caption_bg_style=caption_bg_style, + width=width, + height=height, + speed=speed, + pan_and_zoom=pan_and_zoom, ) result = VideoResult( diff --git a/sitcom_simulator/cli.py b/sitcom_simulator/cli.py index fca96c1..7cb7be5 100644 --- a/sitcom_simulator/cli.py +++ b/sitcom_simulator/cli.py @@ -23,6 +23,10 @@ def _parse_args(): parser.add_argument('--audio-poll-delay', type=int, default=10, help="the number of seconds to wait between polling for audio generation job completion") parser.add_argument('--text-shadow', action='store_true', help="use text shadow for captions instead of box background") parser.add_argument('--save-script', action='store_true', help="save the generated script to a file") + parser.add_argument('--speed', type=float, default=1, help="speed up the final video by this factor (1.0 is normal speed)") + parser.add_argument('--no-pan-and-zoom', action='store_true', help="disable pan and zoom effect on images") + parser.add_argument('--width', type=int, default=720, help="width of the video in pixels. Only 16:9 and 9:16 aspect ratios are supported.") + parser.add_argument('--height', type=int, default=1280, help="height of the video in pixels. Only 16:9 and 9:16 aspect ratios are supported.") args = parser.parse_args() return args @@ -49,4 +53,8 @@ def main(): audio_poll_delay=args.audio_poll_delay, caption_bg_style="text_shadow" if args.text_shadow else "box_shadow", save_script=args.save_script, + speed=args.speed, + pan_and_zoom=not args.no_pan_and_zoom, + width=args.width, + height=args.height, ) \ No newline at end of file diff --git a/sitcom_simulator/image/integrations/stability.py b/sitcom_simulator/image/integrations/stability.py index 250a90c..0d50c7a 100644 --- a/sitcom_simulator/image/integrations/stability.py +++ b/sitcom_simulator/image/integrations/stability.py @@ -19,7 +19,7 @@ def generate_image(prompt:str, width:int=1024, height:int=1024): from stability_sdk.client import StabilityInference, process_artifacts_from_answers # customize engine here if desired (default is newest) - # i.e. engine='stable-diffusion-v1-5' + # e.g., engine='stable-diffusion-v1-5' stability_api = StabilityInference( STABILITY_HOST, key=os.getenv('STABILITY_API_KEY'), @@ -33,7 +33,7 @@ def generate_image(prompt:str, width:int=1024, height:int=1024): ) artifacts = process_artifacts_from_answers( - prefix="", prompt=prompt, answers=answers, write=False, verbose=False + prefix="", prompt=prompt, answers=answers, write=False, verbose=False, ) img_path = None diff --git a/sitcom_simulator/models.py b/sitcom_simulator/models.py index 7b6a6b3..a7d2b92 100644 --- a/sitcom_simulator/models.py +++ b/sitcom_simulator/models.py @@ -101,12 +101,14 @@ class ScriptMetadata: :param prompt: The prompt for the script :param bgm_path: The path to the background music :param misc: Any additional metadata + :param landscape: If True, the video is 16:9 instead of 9:16 """ title: str | None bgm_style: str | None art_style: str | None prompt: str | None bgm_path: str | None + landscape: bool | None @staticmethod def from_dict(data: dict): @@ -121,6 +123,7 @@ def from_dict(data: dict): art_style=data.get('art_style'), prompt=data.get('prompt'), bgm_path=data.get('bgm_path'), + landscape=data.get('landscape') ) def replace(self, **kwargs): diff --git a/sitcom_simulator/script/script_generator.py b/sitcom_simulator/script/script_generator.py index 9a1e23a..cdc4a6d 100644 --- a/sitcom_simulator/script/script_generator.py +++ b/sitcom_simulator/script/script_generator.py @@ -86,7 +86,6 @@ def write_script( def script_from_file(path: str) -> Script: script = Script.from_dict(toml.load(path)) - print(type(script)) return script def formatted_script(script: Script) -> str: diff --git a/sitcom_simulator/speech/integrations/fakeyou.py b/sitcom_simulator/speech/integrations/fakeyou.py index 0d7eaf4..c8e88b5 100644 --- a/sitcom_simulator/speech/integrations/fakeyou.py +++ b/sitcom_simulator/speech/integrations/fakeyou.py @@ -44,6 +44,7 @@ def fetch_voicelist(): """ import requests response = requests.get('https://api.fakeyou.com/tts/list') + logging.info("Fetching voice list from fakeyou") json = response.json() if(json['success'] != True): print("Error fetching voice list from fakeyou. Exiting.") @@ -120,8 +121,8 @@ def sign_in(username_or_email: str, password: str) -> str: def generate_voices( script: Script, on_voice_url_generated: Optional[Callable[[int, str], None]] = None, - job_delay:int=30, - poll_delay:int=10, + job_delay:float=30, + poll_delay:float=10, cookie:str|None=None, ) -> List[str | None]: """ diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index aa12758..bca76b5 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -8,6 +8,8 @@ import atexit from dataclasses import dataclass +FRAME_RATE = 24 + @dataclass class ShadowSettings: """ @@ -87,10 +89,16 @@ class ClipSettings: :param clip_buffer_seconds: How much time to wait after characters finish talking :param min_clip_seconds: The minimum time to hold on a clip :param speaking_delay_seconds: Delay before the audio kicks in + :param max_zoom_factor: The maximum zoom factor for the pan and zoom effect + :param max_pan_speed: The maximum speed of the pan and zoom effect """ clip_buffer_seconds:float=0.15 min_clip_seconds:float=1.5 speaking_delay_seconds:float=0.12 + max_zoom_factor:float=1.2 + min_zoom_factor:float=1.0 + max_pan_speed:float=0.15 + min_pan_speed:float=0.0 failed_image_captions = [ "This image has been seized by the FBI", @@ -112,6 +120,8 @@ def render_clip( clip: Clip, width:int=720, height:int=1280, + speed:float=1.0, + pan_and_zoom:bool=True, clip_settings:ClipSettings=ClipSettings(), caption_settings:CaptionSettings=CaptionSettings(), caption_bg_settings:BoxSettings|ShadowSettings=BoxSettings(), @@ -123,6 +133,8 @@ def render_clip( :param font: The path to the font file to use for the captions :param width: The width of the video :param height: The height of the video + :param speed: The speed of the final video. 1.0 is normal speed + :param pan_and_zoom: If True, the pan and zoom effect on images will be enabled :param clip_settings: The settings for rendering the video clip :param caption_max_width: The maximum width of the captions, in characters :param caption_settings: The settings for the captions @@ -134,18 +146,22 @@ def render_clip( if caption: caption = caption_settings.formatted_caption(caption) - scale_factor = width / 720 # 720 is the reference screen width + scale_factor = min(width, height) / 720 # 720 is the reference screen width - try: - audio_path = clip.audio_path.replace('/', '\\') if os.name == 'nt' else clip.audio_path - audio_duration = float(ffmpeg.probe(audio_path)['streams'][0]['duration']) if clip.audio_path else 0 - except Exception as e: - print(f"Error probing audio duration: {e}.\nHave you put ffmpeg and ffprobe binaries into the root project directory?") - print(clip.audio_path) + if clip.audio_path: + try: + audio_path = clip.audio_path.replace('/', '\\') if os.name == 'nt' else clip.audio_path + audio_duration = float(ffmpeg.probe(audio_path)['streams'][0]['duration']) if clip.audio_path else 0 + except Exception as e: + print(f"Error probing audio duration: {e}.\nHave you put ffmpeg and ffprobe binaries into the root project directory?") + print(clip.audio_path) + audio_duration = 0 + else: audio_duration = 0 duration = audio_duration + clip_settings.clip_buffer_seconds + clip_settings.speaking_delay_seconds duration = max(duration, clip_settings.min_clip_seconds) + duration = duration / speed if clip.duration and not clip.speaker: # 'not speaker' in case the llm forgets proper syntax duration = clip.duration @@ -155,11 +171,48 @@ def render_clip( if no_image or seized_image: video_input = ffmpeg.input(f'color=c=black:s={width}x{height}:d=5', f='lavfi') else: + video_input = ffmpeg.input(clip.image_path, loop=1, framerate=FRAME_RATE) + # the zoom effect is jittery for some strange reason + # but if we upscale the image first, the jitter is less noticeable + # at the cost of slower rendering + prezoom_scale_factor = 3 if pan_and_zoom else 1 + prezoom_scale_width = int(width * prezoom_scale_factor) + prezoom_scale_height = int(height * prezoom_scale_factor) video_input = ( - ffmpeg.input(clip.image_path, loop=1, framerate=24) - .filter('scale', width, height, force_original_aspect_ratio="increase") - .filter('crop', width, height) + video_input + .filter('scale', prezoom_scale_width, prezoom_scale_height, force_original_aspect_ratio="increase") + .filter('crop', prezoom_scale_width, prezoom_scale_height) ) + if pan_and_zoom: + zoom_start = 1.0 # Start with no zoom + zoom_end = random.uniform(clip_settings.min_zoom_factor, clip_settings.max_zoom_factor) # Target end zoom level, adjust as needed + zoom_out = random.choice([True, False]) # Randomly zoom in or out + if zoom_out: + zoom_start, zoom_end = zoom_end, zoom_start # Reverse the zoom levels for a zoom out effect + total_frames = int(duration * FRAME_RATE) # Total frames based on video duration and frame rate + + # Ensure zoom continues smoothly for the entire duration + zoom_expr = f'{zoom_start}+(on/{total_frames})*{zoom_end-zoom_start}' + + # Randomly pan the image + max_pan = clip_settings.max_pan_speed + + # These expressions pan the image randomly but are weirdly jittery when they start from the center (iw/2-(iw/zoom/2)) + x_expr = f'(iw/2.0-(iw/zoom/2.0))+{random.uniform(-max_pan, max_pan)}*on*iw/{total_frames}' + y_expr = f'(ih/2.0-(ih/zoom/2.0))+{random.uniform(-max_pan, max_pan)}*on*ih/{total_frames}' + + # instead, we'll use a static value that represents one of the four corners of the image + # x_expr = random.choice([0, width]) + # y_expr = random.choice([0, height]) + + video_input = video_input.zoompan( + z=zoom_expr, + x=x_expr, + y=y_expr, + d=1, # Apply the effect continuously across frames + s=f'{width}x{height}', + fps=FRAME_RATE, + ) speaking_delay_ms = clip_settings.speaking_delay_seconds * 1000 @@ -172,6 +225,7 @@ def render_clip( .input(clip.audio_path) .filter('adelay', f'{speaking_delay_ms}|{speaking_delay_ms}') .filter('apad', pad_dur=duration) + .filter('atempo', speed) ) caption_bg_dict = caption_bg_settings.to_dict() if isinstance(caption_bg_settings, BoxSettings) else caption_bg_settings.to_dict() @@ -189,6 +243,8 @@ def render_clip( **caption_bg_dict, ) + video_input = video_input.filter('setpts', f'PTS/{speed}') + try: input_streams = [video_input] if audio_input is None else [video_input, audio_input] with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file: @@ -201,7 +257,7 @@ def render_clip( return temp_file.name except ffmpeg.Error as e: print('FFmpeg Error:', e.stderr.decode() if e.stderr else str(e)) # Decoding the stderr for better readability - raise Exception("ffmpeg error:", e.stderr if e.stderr else str(e)) + raise Exception(f"ffmpeg error: {e.stderr.decode() if e.stderr else str(e)}") def concatenate_clips( @@ -255,7 +311,7 @@ def concatenate_clips( vcodec='libx264', pix_fmt='yuv420p', # necessary for compatibility acodec='mp3', - r=24, + r=FRAME_RATE, **{'b:v': '8000K'} ) .overwrite_output() @@ -270,6 +326,8 @@ def render_video( output_path: str = 'output.mp4', width:int=720, height:int=1280, + speed:float=1.0, + pan_and_zoom:bool=True, clip_settings:ClipSettings=ClipSettings(), caption_settings:CaptionSettings=CaptionSettings(), caption_bg_settings:BoxSettings|ShadowSettings=BoxSettings(), @@ -283,6 +341,8 @@ def render_video( :param output_path: The path to save the rendered video :param width: The width of the video :param height: The height of the video + :param speed: The speed of the final video. 1.0 is normal speed + :param pan_and_zoom: If True, the pan and zoom effect on images will be enabled :param clip_settings: The settings for rendering the video clip :param caption_settings: The settings for the captions :param caption_bg_settings: The settings for the caption background @@ -296,6 +356,8 @@ def render_video( clip_settings=clip_settings, caption_settings=caption_settings, caption_bg_settings=caption_bg_settings, + speed=speed, + pan_and_zoom=pan_and_zoom, ) intermediate_clips.append(clip_file) diff --git a/sitcom_simulator/video/video_generator.py b/sitcom_simulator/video/video_generator.py index f014184..34d40f1 100644 --- a/sitcom_simulator/video/video_generator.py +++ b/sitcom_simulator/video/video_generator.py @@ -9,14 +9,16 @@ def render_video( output_path="output.mp4", width:int=1080, height:int=1920, - clip_buffer_seconds=0.35, - min_clip_seconds=1.5, - speaking_delay_seconds=0.12, + speed:float=1.0, + pan_and_zoom:bool=True, + clip_buffer_seconds:float=0.35, + min_clip_seconds:float=1.5, + speaking_delay_seconds:float=0.12, caption_bg_style:CaptionBg='box_shadow', - caption_bg_alpha=0.6, - caption_bg_color="black", - caption_bg_shadow_distance_x=5, - caption_bg_shadow_distance_y=5, + caption_bg_alpha:float=0.6, + caption_bg_color:str="black", + caption_bg_shadow_distance_x:float=5, + caption_bg_shadow_distance_y:float=5, ): """ Renders a video from the given script and returns the path to the rendered video. @@ -26,6 +28,8 @@ def render_video( :param output_path: The path to save the rendered video to :param width: The width of the video to render :param height: The height of the video to render + :param speed: The speed of the final video. 1.0 is normal speed. + :param pan_and_zoom: If True, the pan and zoom effect on images will be enabled. :param clip_buffer_seconds: How much time to wait after characters finish talking :param min_clip_length: The minimum time to hold on a clip :param speaking_delay_seconds: How much time to wait after a character starts talking @@ -90,6 +94,8 @@ def render_video( output_path=output_path, width=width, height=height, + speed=speed, + pan_and_zoom=pan_and_zoom, caption_settings=CaptionSettings( font=font, ), From 3bd92c4918c283ec5a37618cf3559e66d1acf8c2 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Thu, 29 Feb 2024 23:05:31 -0700 Subject: [PATCH 20/40] added curated voices list --- .../fakeyou/character_extractor.py | 32 ++++++++++++++++--- .../integrations/fakeyou/curated_voices.csv | 16 ++++++++++ sitcom_simulator/video/integrations/ffmpeg.py | 4 +-- 3 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 sitcom_simulator/script/integrations/fakeyou/curated_voices.csv diff --git a/sitcom_simulator/script/integrations/fakeyou/character_extractor.py b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py index 470505d..63edf56 100644 --- a/sitcom_simulator/script/integrations/fakeyou/character_extractor.py +++ b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py @@ -5,6 +5,24 @@ from sitcom_simulator.models import Character import logging from typing import List +import os +import csv + +def load_curated_voices(): + """ + Loads the curated voices from the 'curated_voices.csv' file in the same directory as this script. + Important for when fakeyou's ratings get wiped (which has happened before), we still have our own records. + """ + curated_voices: dict[str, float] = {} + # note: needs to be in the same directory as this script, not the current working directory + path_to_curated_voices = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'curated_voices.csv') + with open(path_to_curated_voices, 'r') as f: + reader = csv.DictReader(f) + for row in reader: + name = row['model_name'].strip() + rating = row['rating'].strip() + curated_voices[name] = float(rating) + return curated_voices def generate_character_list(prompt: str, custom_instructions: str | None=None) -> List[Character]: """ @@ -42,6 +60,7 @@ def generate_character_list(prompt: str, custom_instructions: str | None=None) - fakeyou_character_list = res.json()['models'] name_to_model = pure_name_to_model(fakeyou_character_list) + curated_characters = load_curated_voices() chosen_characters = [] for name in character_names: # TODO (big maybe) if tts doesn't exist but vtv does, render tts in someone else's voice and then use vtv @@ -49,7 +68,7 @@ def generate_character_list(prompt: str, custom_instructions: str | None=None) - continue matches = name_to_model[name.lower()] # find the highest-rated match - highest_rated_voice = max(matches, key=calculate_star_rating) + highest_rated_voice = max(matches, key=lambda model: calculate_star_rating(model, curated_characters)) chosen_characters.append(Character(name=name, voice_token=highest_rated_voice['model_token'])) logging.info("Selected voices:", ", ".join([c.name for c in chosen_characters])) @@ -90,12 +109,16 @@ def pure_character_name(raw_name: str): if match: return match.group(1) return None - + DEFAULT_RATING = 2 # not the worst possible, but pretty bad -def calculate_star_rating(model): +def calculate_star_rating(model, curated_voices: dict[str, float] | None=None): """ Estimates the true ratio of positive to negative reviews. Intuition: 5 stars from 10 reviews is worse than 4.8 stars from 1000 reviews. """ + + curated_rating = curated_voices.get(model['title']) + if curated_rating: + return curated_rating if 'user_ratings' not in model: return DEFAULT_RATING positive_count = model['user_ratings']['positive_count'] @@ -106,4 +129,5 @@ def calculate_star_rating(model): beta_posterior = 1 + negative_count # Prior beta = 1 mean_proportion = alpha_posterior / (alpha_posterior + beta_posterior) star_rating = 1 + 4 * mean_proportion - return mean_proportion, star_rating \ No newline at end of file + + return star_rating \ No newline at end of file diff --git a/sitcom_simulator/script/integrations/fakeyou/curated_voices.csv b/sitcom_simulator/script/integrations/fakeyou/curated_voices.csv new file mode 100644 index 0000000..3387d2a --- /dev/null +++ b/sitcom_simulator/script/integrations/fakeyou/curated_voices.csv @@ -0,0 +1,16 @@ +model_name,rating +Luigi (Charles Martinet) (fixed version),5 +Mario (Charles Martinet, 1994-2023) (New!),5 +Shrek (New),5 +GLaDOS (Ellen McLain),5 +GLaDOS (Ellen McLain, Portal 2),5 +Morgan Freeman (New),5 +Kurzgesagt (New),5 +"Weird Al" Yankovic,3.5 +Shaggy Rogers (Scott Innes),4 +Shadow The Hedgehog (Jason Griffith),3.75 +Tom Cruise (New),4.5 +Elon Musk (New Version 2.0),3.5 +Donald Trump (Version 3.0),4 +Joe Biden (New, 46th U.S. President),5 +Barack Obama (NEW, 44th U.S. President),5 \ No newline at end of file diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index bca76b5..ee78ff5 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -95,9 +95,9 @@ class ClipSettings: clip_buffer_seconds:float=0.15 min_clip_seconds:float=1.5 speaking_delay_seconds:float=0.12 - max_zoom_factor:float=1.2 + max_zoom_factor:float=1.1 min_zoom_factor:float=1.0 - max_pan_speed:float=0.15 + max_pan_speed:float=0.1 min_pan_speed:float=0.0 failed_image_captions = [ From c6bdea9a66acb4e85bc201a87b57cb840b1a8be6 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Thu, 29 Feb 2024 23:42:48 -0700 Subject: [PATCH 21/40] added audio balancing and narrator dropout --- sitcom_simulator/auto.py | 3 ++ sitcom_simulator/cli.py | 2 + .../integrations/fakeyou/curated_voices.csv | 29 +++++++------- sitcom_simulator/script/llm_instructions.txt | 5 +-- sitcom_simulator/script/script_generator.py | 6 +++ sitcom_simulator/video/integrations/ffmpeg.py | 40 +++++++++++++++++-- sitcom_simulator/video/video_generator.py | 9 +++++ 7 files changed, 73 insertions(+), 21 deletions(-) diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index c9328b3..442605d 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -17,6 +17,7 @@ def create_sitcom( pan_and_zoom:bool=True, width:int=720, height:int=1280, + narrator_dropout:bool=False, ): """ Generates a sitcom video based on a prompt or a script file. @@ -39,6 +40,7 @@ def create_sitcom( :param disable_pan_and_zoom: If True, the pan and zoom effect on images will be disabled. :param width: The width of the video to render. :param height: The height of the video to render. + :param narrator_dropout: If True, the narrator will be forcibly removed from the script (ChatGPT often goes heavy on the narrators). """ from .models import VideoResult from .script import write_script @@ -61,6 +63,7 @@ def create_sitcom( max_tokens=max_tokens, require_approval=approve_script, fakeyou_characters=not debug_audio, + narrator_dropout=narrator_dropout, ) elif script_path and not prompt: initial_script = script_from_file(script_path) diff --git a/sitcom_simulator/cli.py b/sitcom_simulator/cli.py index 7cb7be5..c35b0e8 100644 --- a/sitcom_simulator/cli.py +++ b/sitcom_simulator/cli.py @@ -27,6 +27,7 @@ def _parse_args(): parser.add_argument('--no-pan-and-zoom', action='store_true', help="disable pan and zoom effect on images") parser.add_argument('--width', type=int, default=720, help="width of the video in pixels. Only 16:9 and 9:16 aspect ratios are supported.") parser.add_argument('--height', type=int, default=1280, help="height of the video in pixels. Only 16:9 and 9:16 aspect ratios are supported.") + parser.add_argument('--no-narrators', action='store_true', help="disable narrator characters") args = parser.parse_args() return args @@ -57,4 +58,5 @@ def main(): pan_and_zoom=not args.no_pan_and_zoom, width=args.width, height=args.height, + narrator_dropout=args.no_narrators, ) \ No newline at end of file diff --git a/sitcom_simulator/script/integrations/fakeyou/curated_voices.csv b/sitcom_simulator/script/integrations/fakeyou/curated_voices.csv index 3387d2a..9043b5e 100644 --- a/sitcom_simulator/script/integrations/fakeyou/curated_voices.csv +++ b/sitcom_simulator/script/integrations/fakeyou/curated_voices.csv @@ -1,16 +1,17 @@ model_name,rating -Luigi (Charles Martinet) (fixed version),5 -Mario (Charles Martinet, 1994-2023) (New!),5 -Shrek (New),5 -GLaDOS (Ellen McLain),5 -GLaDOS (Ellen McLain, Portal 2),5 -Morgan Freeman (New),5 -Kurzgesagt (New),5 +"Luigi (Charles Martinet) (fixed version)",5 +"Mario (Charles Martinet, 1994-2023) (New!)",5 +"Shrek (New)",5 +"GLaDOS (Ellen McLain)",5 +"GLaDOS (Ellen McLain, Portal 2)",5 +"Morgan Freeman (New)",5 +"Kurzgesagt (New)",5 "Weird Al" Yankovic,3.5 -Shaggy Rogers (Scott Innes),4 -Shadow The Hedgehog (Jason Griffith),3.75 -Tom Cruise (New),4.5 -Elon Musk (New Version 2.0),3.5 -Donald Trump (Version 3.0),4 -Joe Biden (New, 46th U.S. President),5 -Barack Obama (NEW, 44th U.S. President),5 \ No newline at end of file +"Shaggy Rogers (Scott Innes)",4 +"Shadow The Hedgehog (Jason Griffith)",3.75 +"Tom Cruise (New)",4.5 +"Elon Musk (New Version 2.0)",3.5 +"Donald Trump (Version 3.0)",4 +"Joe Biden (New, 46th U.S. President)",5 +"Barack Obama (NEW, 44th U.S. President)",5 +"Toad (Super Mario, Jen Taylor)",3 \ No newline at end of file diff --git a/sitcom_simulator/script/llm_instructions.txt b/sitcom_simulator/script/llm_instructions.txt index df6bb6e..981cfbd 100644 --- a/sitcom_simulator/script/llm_instructions.txt +++ b/sitcom_simulator/script/llm_instructions.txt @@ -44,14 +44,11 @@ Pro Tips: - Be bold and avante garde. - Censor anything truly inappropriate like racism, but do not censor things like horror or dark themes. - Scripts should be approximately 30-60 seconds in duration, and have at least 4-6 clips of dialog unless otherwise specified. -- Take yourself seriously, but also crank it up to ELEVEN on the wierdness scale, baby. - Keep famous characters in character -- End with a twist. NO generic, boring happy endings. -- The last clip should always be an unexpected, wacky twist. - Do not use narrators unless absolutely necessary. - No TOML comments (#) or markdown. Just pure TOML. -Now, take a deep breath and a shot of whiskey, and write a script for the following video: +Now write a script for the following video: "{prompt}" diff --git a/sitcom_simulator/script/script_generator.py b/sitcom_simulator/script/script_generator.py index cdc4a6d..133a627 100644 --- a/sitcom_simulator/script/script_generator.py +++ b/sitcom_simulator/script/script_generator.py @@ -14,6 +14,7 @@ def write_script( custom_script_instructions: str | None=None, custom_character_instructions: str | None=None, fakeyou_characters:bool=True, + narrator_dropout:bool=False, ) -> Script: """ Uses AI to generate a script matching the prompt. @@ -30,6 +31,7 @@ def write_script( :param custom_script_instructions: A string containing custom instructions for the language model writing the script. Must contain the placeholders '{prompt}', '{music_categories}', and '{characters}'. :param custom_character_instructions: A string containing custom instructions for the language model extracting the characters from the prompt. Must contain the placeholder '{prompt}'. :param fakeyou_characters: Whether to restrict character selection to only voices from fakeyou.com + :param narrator_dropout: Whether to forcibly remove narrators from the script (ChatGPT often goes heavy on the narrators) """ from ..speech.integrations.fakeyou import get_possible_characters_from_prompt from .integrations.chatgpt import chatgpt @@ -70,6 +72,10 @@ def write_script( toml_script = toml.loads(raw_script) toml_script["characters"] = [asdict(c) for c in characters] # from characters to dict back to character. Refactor at some point. script = Script.from_dict(toml_script) + if narrator_dropout: + script = script.replace(clips=[c for c in script.clips if c.speaker.lower().strip() != "narrator"]) + if len(script.clips) == 0: + raise ValueError("Narrator dropout resulted in an empty script. Please try again.") logging.debug("TOML script", script) print(formatted_script(script)) if(require_approval): diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index ee78ff5..dd8cb44 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -116,6 +116,32 @@ class ClipSettings: "Image seized by the government", ] + +def analyze_loudness(audio_path): + """ + Analyzes the loudness of the given audio file and returns its integrated loudness in LUFS. + """ + import subprocess + try: + cmd = [ + 'ffmpeg', '-nostats', '-i', audio_path, + '-filter_complex', 'ebur128=peak=true', + '-f', 'null', '-' + ] + result = subprocess.run(cmd, capture_output=True, text=True) + print(result) + # Parse the output for loudness information - this example may need adjustment + # to properly extract the loudness value from your FFmpeg version's output + for line in result.stderr.split('\n'): + if 'Integrated loudness:' in line: + # Example line: "Integrated loudness: I: -23.1 LUFS" + loudness_value = float(line.split()[-2]) + return loudness_value + except Exception as e: + print(f"Error analyzing loudness: {e}") + return None + + def render_clip( clip: Clip, width:int=720, @@ -226,6 +252,7 @@ def render_clip( .filter('adelay', f'{speaking_delay_ms}|{speaking_delay_ms}') .filter('apad', pad_dur=duration) .filter('atempo', speed) + .filter('speechnorm') ) caption_bg_dict = caption_bg_settings.to_dict() if isinstance(caption_bg_settings, BoxSettings) else caption_bg_settings.to_dict() @@ -264,7 +291,7 @@ def concatenate_clips( filenames: List[str], output_filename: str, background_music:str|None=None, - bgm_volume:float=0.25, + bgm_volume:float=-24, ): """ Combines the given video clips into a single video file and returns the path to the concatenated video file. @@ -294,7 +321,8 @@ def concatenate_clips( bgm_input = ( ffmpeg .input(background_music) - .filter('volume', str(bgm_volume)) + # .filter('volume', str(bgm_volume)) # old way, ~.25 worked well + .filter('loudnorm', i=bgm_volume) # new way, more consistent .filter('atrim', duration=total_audio_duration) ) concatenated_audio = ffmpeg.filter([concatenated_audio, bgm_input], 'amix') # Mix concatenated audio and bgm @@ -331,6 +359,7 @@ def render_video( clip_settings:ClipSettings=ClipSettings(), caption_settings:CaptionSettings=CaptionSettings(), caption_bg_settings:BoxSettings|ShadowSettings=BoxSettings(), + bgm_volume:float=-24, ): """ Renders a video from the given script and returns the path to the rendered video file. @@ -346,6 +375,7 @@ def render_video( :param clip_settings: The settings for rendering the video clip :param caption_settings: The settings for the captions :param caption_bg_settings: The settings for the caption background + :param bgm_volume: The volume of the background music, good values are between -24 and -16 """ intermediate_clips = [] for clip in tqdm(script.clips, desc="Rendering intermediate video clips"): @@ -361,6 +391,10 @@ def render_video( ) intermediate_clips.append(clip_file) - final_video_path = concatenate_clips(intermediate_clips, output_path, background_music=script.metadata.bgm_path) + final_video_path = concatenate_clips( + intermediate_clips, + output_path, + background_music=script.metadata.bgm_path + ) return final_video_path \ No newline at end of file diff --git a/sitcom_simulator/video/video_generator.py b/sitcom_simulator/video/video_generator.py index 34d40f1..5662255 100644 --- a/sitcom_simulator/video/video_generator.py +++ b/sitcom_simulator/video/video_generator.py @@ -19,6 +19,9 @@ def render_video( caption_bg_color:str="black", caption_bg_shadow_distance_x:float=5, caption_bg_shadow_distance_y:float=5, + max_zoom_factor:float=1.1, + max_pan_speed:float=0.1, + bgm_volume:float=-24, ): """ Renders a video from the given script and returns the path to the rendered video. @@ -38,6 +41,9 @@ def render_video( :param caption_bg_color: The color of the background behind the captions :param caption_bg_shadow_distance_x: The x distance of the shadow behind the captions :param caption_bg_shadow_distance_y: The y distance of the shadow behind the captions + :param max_zoom_factor: The maximum zoom factor for pan and zoom + :param max_pan_speed: The maximum pan speed for pan and zoom + :param bgm_volume: The volume of the background music """ # rely on image_path first, but if it's not there and image_url is, download the image import requests @@ -103,6 +109,9 @@ def render_video( clip_buffer_seconds=clip_buffer_seconds, min_clip_seconds=min_clip_seconds, speaking_delay_seconds=speaking_delay_seconds, + max_zoom_factor=max_zoom_factor, + max_pan_speed=max_pan_speed, ), caption_bg_settings=caption_bg_settings, + bgm_volume=bgm_volume, ) \ No newline at end of file From a98c094dc513e2f397ca4da847ca7a6c67ff7eec Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Fri, 1 Mar 2024 01:06:17 -0700 Subject: [PATCH 22/40] fixed various minor bugs with the previous release and made the orientation parameters more robust and intuitive --- pyproject.toml | 2 +- sitcom_simulator/auto.py | 18 ++--- sitcom_simulator/cli.py | 8 +-- sitcom_simulator/image/image_generator.py | 26 ++++--- sitcom_simulator/models.py | 7 +- sitcom_simulator/video/integrations/ffmpeg.py | 67 ++++++------------- sitcom_simulator/video/video_generator.py | 24 +++++-- 7 files changed, 72 insertions(+), 80 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ff1802e..dee8956 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.4.0" +version = "0.4.1" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index 442605d..a504729 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -15,8 +15,8 @@ def create_sitcom( save_script:bool=False, speed:float=1, pan_and_zoom:bool=True, - width:int=720, - height:int=1280, + orientation:str="portrait", + resolution:int=1080, narrator_dropout:bool=False, ): """ @@ -37,9 +37,9 @@ def create_sitcom( :param caption_bg_style: The style of the background behind the captions. :param save_script: If True, the generated script will be saved to a file. :param speed: The speed of the final video. 1.0 is normal speed. - :param disable_pan_and_zoom: If True, the pan and zoom effect on images will be disabled. - :param width: The width of the video to render. - :param height: The height of the video to render. + :param pan_and_zoom: If True, the pan and zoom effect on images will be enabled. + :param orientation: The orientation of the video. "landscape", "portrait", or "square". + :param resolution: The width of the video to render assuming portrait mode. This takes into account the orientation parameter. :param narrator_dropout: If True, the narrator will be forcibly removed from the script (ChatGPT often goes heavy on the narrators). """ from .models import VideoResult @@ -55,6 +55,7 @@ def create_sitcom( prompt = input("Enter a prompt to generate the video script: ") assert prompt or script_path, "You must provide a prompt or a script path" + assert orientation in ["landscape", "portrait", "square"], "Orientation must be 'landscape', 'portrait', or 'square'" if prompt and not script_path: initial_script = write_script( @@ -84,8 +85,7 @@ def create_sitcom( script_with_images = add_images( script_with_voices, engine="stability" if not debug_images else "pillow", - width=width, - height=height, + orientation=orientation, ) script_with_music = add_music(script_with_images) @@ -100,8 +100,8 @@ def create_sitcom( font=font, output_path=output_path, caption_bg_style=caption_bg_style, - width=width, - height=height, + resolution=resolution, + orientation=orientation, speed=speed, pan_and_zoom=pan_and_zoom, ) diff --git a/sitcom_simulator/cli.py b/sitcom_simulator/cli.py index c35b0e8..3f05604 100644 --- a/sitcom_simulator/cli.py +++ b/sitcom_simulator/cli.py @@ -25,8 +25,8 @@ def _parse_args(): parser.add_argument('--save-script', action='store_true', help="save the generated script to a file") parser.add_argument('--speed', type=float, default=1, help="speed up the final video by this factor (1.0 is normal speed)") parser.add_argument('--no-pan-and-zoom', action='store_true', help="disable pan and zoom effect on images") - parser.add_argument('--width', type=int, default=720, help="width of the video in pixels. Only 16:9 and 9:16 aspect ratios are supported.") - parser.add_argument('--height', type=int, default=1280, help="height of the video in pixels. Only 16:9 and 9:16 aspect ratios are supported.") + parser.add_argument('--resolution', type=int, default=1080, help="the resolution of the video (passing in 1080 means 1080p)") + parser.add_argument('--orientation', type=str, default='portrait', help="the orientation of the video (landscape, portrait, or square)") parser.add_argument('--no-narrators', action='store_true', help="disable narrator characters") args = parser.parse_args() return args @@ -56,7 +56,7 @@ def main(): save_script=args.save_script, speed=args.speed, pan_and_zoom=not args.no_pan_and_zoom, - width=args.width, - height=args.height, + orientation=args.orientation, + resolution=args.resolution, narrator_dropout=args.no_narrators, ) \ No newline at end of file diff --git a/sitcom_simulator/image/image_generator.py b/sitcom_simulator/image/image_generator.py index 48c5446..86d86a2 100644 --- a/sitcom_simulator/image/image_generator.py +++ b/sitcom_simulator/image/image_generator.py @@ -5,11 +5,11 @@ import atexit Engine = Literal["stability", "pillow"] +Orientation = Literal["landscape", "portrait", "square"] def generate_images( script: Script, - width=768, - height=1344, + orientation:Orientation="portrait", on_image_generated: Optional[Callable[[int, str], None]] = None, engine:Engine="stability", ): @@ -19,11 +19,15 @@ def generate_images( More procedural in nature than add_images. :param script: The script to generate images for - :param width: The width of the images to generate - :param height: The height of the images to generate + :param orientation: The orientation of the images to generate :param on_image_generated: A callback to call after each image is generated which takes the clip index and path to the generated image :param engine: The engine to use for generating images """ + width, height = { + "landscape": (1344, 768), + "portrait": (768, 1344), + "square": (1024, 1024), + }[orientation] from .integrations import stability, pillow image_paths: List[str | None] = [] for i, clip in tqdm(enumerate(script.clips), desc="Generating images", total=len(script.clips)): @@ -48,8 +52,7 @@ def generate_images( def add_images( script: Script, - width=768, - height=1344, + orientation:Orientation="portrait", on_image_generated: Optional[Callable[[int, str], None]] = None, engine:Engine="stability", ) -> Script: @@ -59,15 +62,16 @@ def add_images( More functional in nature than generate_images. :param script: The script to add images to - :param width: The width of the images to generate - :param height: The height of the images to generate + :param orientation: The orientation of the images to generate :param on_image_generated: A callback to call after each image is generated which takes the clip index and path to the generated image :param engine: The engine to use for generating images """ image_paths = generate_images( script=script, - width=width, - height=height, + orientation=orientation, on_image_generated=on_image_generated, engine=engine) - return script.replace(clips=[clip.replace(image_path=image_path) for clip, image_path in zip(script.clips, image_paths)]) \ No newline at end of file + return script.replace( + clips=[clip.replace(image_path=image_path) for clip, image_path in zip(script.clips, image_paths)], + metadata=script.metadata.replace(orientation=orientation) + ) \ No newline at end of file diff --git a/sitcom_simulator/models.py b/sitcom_simulator/models.py index a7d2b92..8479137 100644 --- a/sitcom_simulator/models.py +++ b/sitcom_simulator/models.py @@ -100,15 +100,14 @@ class ScriptMetadata: :param art_style: The style of the art :param prompt: The prompt for the script :param bgm_path: The path to the background music - :param misc: Any additional metadata - :param landscape: If True, the video is 16:9 instead of 9:16 + :param orientation: The orientation of the video """ title: str | None bgm_style: str | None art_style: str | None prompt: str | None bgm_path: str | None - landscape: bool | None + orientation: str | None @staticmethod def from_dict(data: dict): @@ -123,7 +122,7 @@ def from_dict(data: dict): art_style=data.get('art_style'), prompt=data.get('prompt'), bgm_path=data.get('bgm_path'), - landscape=data.get('landscape') + orientation=data.get('orientation'), ) def replace(self, **kwargs): diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index dd8cb44..322aaa6 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -7,6 +7,7 @@ import tempfile import atexit from dataclasses import dataclass +import math FRAME_RATE = 24 @@ -90,15 +91,15 @@ class ClipSettings: :param min_clip_seconds: The minimum time to hold on a clip :param speaking_delay_seconds: Delay before the audio kicks in :param max_zoom_factor: The maximum zoom factor for the pan and zoom effect + :param min_zoom_factor: The minimum zoom factor for the pan and zoom effect. At least some zoom is necessary for panning. :param max_pan_speed: The maximum speed of the pan and zoom effect """ clip_buffer_seconds:float=0.15 min_clip_seconds:float=1.5 speaking_delay_seconds:float=0.12 - max_zoom_factor:float=1.1 - min_zoom_factor:float=1.0 - max_pan_speed:float=0.1 - min_pan_speed:float=0.0 + max_zoom_factor:float=1.3 # magic number that seems to work well + min_zoom_factor:float=1.05 # magic number that seems to work well + max_pan_speed:float=6 # magic number that seems to work well failed_image_captions = [ "This image has been seized by the FBI", @@ -116,32 +117,6 @@ class ClipSettings: "Image seized by the government", ] - -def analyze_loudness(audio_path): - """ - Analyzes the loudness of the given audio file and returns its integrated loudness in LUFS. - """ - import subprocess - try: - cmd = [ - 'ffmpeg', '-nostats', '-i', audio_path, - '-filter_complex', 'ebur128=peak=true', - '-f', 'null', '-' - ] - result = subprocess.run(cmd, capture_output=True, text=True) - print(result) - # Parse the output for loudness information - this example may need adjustment - # to properly extract the loudness value from your FFmpeg version's output - for line in result.stderr.split('\n'): - if 'Integrated loudness:' in line: - # Example line: "Integrated loudness: I: -23.1 LUFS" - loudness_value = float(line.split()[-2]) - return loudness_value - except Exception as e: - print(f"Error analyzing loudness: {e}") - return None - - def render_clip( clip: Clip, width:int=720, @@ -166,6 +141,9 @@ def render_clip( :param caption_settings: The settings for the captions :param caption_bg_settings: The settings for the caption background """ + width = int(round(width)) + height = int(round(height)) + import ffmpeg caption = clip.speech or clip.title title_clip = not not clip.title @@ -210,7 +188,7 @@ def render_clip( .filter('crop', prezoom_scale_width, prezoom_scale_height) ) if pan_and_zoom: - zoom_start = 1.0 # Start with no zoom + zoom_start = clip_settings.min_zoom_factor # Start with no zoom zoom_end = random.uniform(clip_settings.min_zoom_factor, clip_settings.max_zoom_factor) # Target end zoom level, adjust as needed zoom_out = random.choice([True, False]) # Randomly zoom in or out if zoom_out: @@ -221,15 +199,13 @@ def render_clip( zoom_expr = f'{zoom_start}+(on/{total_frames})*{zoom_end-zoom_start}' # Randomly pan the image - max_pan = clip_settings.max_pan_speed - - # These expressions pan the image randomly but are weirdly jittery when they start from the center (iw/2-(iw/zoom/2)) - x_expr = f'(iw/2.0-(iw/zoom/2.0))+{random.uniform(-max_pan, max_pan)}*on*iw/{total_frames}' - y_expr = f'(ih/2.0-(ih/zoom/2.0))+{random.uniform(-max_pan, max_pan)}*on*ih/{total_frames}' - - # instead, we'll use a static value that represents one of the four corners of the image - # x_expr = random.choice([0, width]) - # y_expr = random.choice([0, height]) + max_pan = clip_settings.max_pan_speed * (min(width, height) / 720) * prezoom_scale_factor # Maximum pan speed (pixels per frame, scaled to 720p reference screen width + # the sqrt(total_frames) is to make the pan speed scale with the duration of the clip + # so that shorter clips are punchier and longer clips are smoother + frame_offset = f"((on-{total_frames/2})/{math.sqrt(total_frames)})" + x_expr = f'(iw/2.0-(iw/zoom/2.0))+{random.uniform(-max_pan, max_pan)}*{frame_offset}' + y_expr = f'(ih/2.0-(ih/zoom/2.0))+{random.uniform(-max_pan, max_pan)}*{frame_offset}' + print("x_expr", x_expr, "y_expr", y_expr) video_input = video_input.zoompan( z=zoom_expr, @@ -343,15 +319,14 @@ def concatenate_clips( **{'b:v': '8000K'} ) .overwrite_output() - .run() + .run(capture_stderr=True) ) return sanitized_filename -# TODO: support aspect ratios 16:9 and 1:1 def render_video( script: Script, - output_path: str = 'output.mp4', + output_path: str='output.mp4', width:int=720, height:int=1280, speed:float=1.0, @@ -390,11 +365,13 @@ def render_video( pan_and_zoom=pan_and_zoom, ) intermediate_clips.append(clip_file) - + + print("Rendering final video...") final_video_path = concatenate_clips( intermediate_clips, output_path, - background_music=script.metadata.bgm_path + background_music=script.metadata.bgm_path, + bgm_volume=bgm_volume, ) return final_video_path \ No newline at end of file diff --git a/sitcom_simulator/video/video_generator.py b/sitcom_simulator/video/video_generator.py index 5662255..961754f 100644 --- a/sitcom_simulator/video/video_generator.py +++ b/sitcom_simulator/video/video_generator.py @@ -7,8 +7,8 @@ def render_video( script: Script, font: str, output_path="output.mp4", - width:int=1080, - height:int=1920, + resolution:int=1080, + orientation:str="portrait", speed:float=1.0, pan_and_zoom:bool=True, clip_buffer_seconds:float=0.35, @@ -19,8 +19,9 @@ def render_video( caption_bg_color:str="black", caption_bg_shadow_distance_x:float=5, caption_bg_shadow_distance_y:float=5, - max_zoom_factor:float=1.1, - max_pan_speed:float=0.1, + max_zoom_factor:float=1.3, + min_zoom_factor:float=1.05, + max_pan_speed:float=6, bgm_volume:float=-24, ): """ @@ -29,8 +30,8 @@ def render_video( :param script: The script to render :param font: The path to the font file to use :param output_path: The path to save the rendered video to - :param width: The width of the video to render - :param height: The height of the video to render + :param resolution: The width of the video to render assuming portrait mode. This takes into account the orientation parameter. + :param orientation: The orientation of the video. "landscape", "portrait", or "square". :param speed: The speed of the final video. 1.0 is normal speed. :param pan_and_zoom: If True, the pan and zoom effect on images will be enabled. :param clip_buffer_seconds: How much time to wait after characters finish talking @@ -42,9 +43,11 @@ def render_video( :param caption_bg_shadow_distance_x: The x distance of the shadow behind the captions :param caption_bg_shadow_distance_y: The y distance of the shadow behind the captions :param max_zoom_factor: The maximum zoom factor for pan and zoom + :param min_zoom_factor: The minimum zoom factor for pan and zoom :param max_pan_speed: The maximum pan speed for pan and zoom :param bgm_volume: The volume of the background music """ + # rely on image_path first, but if it's not there and image_url is, download the image import requests import tempfile @@ -95,6 +98,14 @@ def render_video( y=caption_bg_shadow_distance_y, ) + aspect_ratio = 16 / 9 + + width, height = { + "landscape": (resolution * aspect_ratio, resolution), + "portrait": (resolution, resolution * aspect_ratio), + "square": (resolution, resolution), + }[orientation] + return ffmpeg.render_video( script=script, output_path=output_path, @@ -110,6 +121,7 @@ def render_video( min_clip_seconds=min_clip_seconds, speaking_delay_seconds=speaking_delay_seconds, max_zoom_factor=max_zoom_factor, + min_zoom_factor=min_zoom_factor, max_pan_speed=max_pan_speed, ), caption_bg_settings=caption_bg_settings, From 6a0523b3887b0ab7b4832549522408c349eac174 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 4 Mar 2024 21:42:59 -0700 Subject: [PATCH 23/40] fixed potential error with using unlisted character --- sitcom_simulator/speech/integrations/fakeyou.py | 2 +- sitcom_simulator/video/integrations/ffmpeg.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/sitcom_simulator/speech/integrations/fakeyou.py b/sitcom_simulator/speech/integrations/fakeyou.py index c8e88b5..991c878 100644 --- a/sitcom_simulator/speech/integrations/fakeyou.py +++ b/sitcom_simulator/speech/integrations/fakeyou.py @@ -149,7 +149,7 @@ def generate_voices( logging.debug(f'Starting voice job {i} ({clip.speaker}: {clip.speaker})') try: character = next((character for character in script.characters if character.name == clip.speaker)) - except Exception as e: # probably because character not in characters list + except: # probably because character not in characters list character = random.choice(BACKUP_NARRATORS) entropy = str(uuid.uuid4()) voice_token = character.voice_token diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index 322aaa6..10fd779 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -131,13 +131,11 @@ def render_clip( Renders a video clip from the given clip object and returns the path to the rendered video file. :param clip: The clip to render - :param font: The path to the font file to use for the captions :param width: The width of the video :param height: The height of the video :param speed: The speed of the final video. 1.0 is normal speed :param pan_and_zoom: If True, the pan and zoom effect on images will be enabled :param clip_settings: The settings for rendering the video clip - :param caption_max_width: The maximum width of the captions, in characters :param caption_settings: The settings for the captions :param caption_bg_settings: The settings for the caption background """ From 7cfc2917033981081f1093a7e41b2c968688af3d Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 4 Mar 2024 22:11:42 -0700 Subject: [PATCH 24/40] added ability to customize background music --- pyproject.toml | 2 +- sitcom_simulator/auto.py | 7 ++++++- sitcom_simulator/cli.py | 2 ++ sitcom_simulator/models.py | 8 +++++-- sitcom_simulator/music/integrations/freepd.py | 2 ++ sitcom_simulator/music/music_generator.py | 21 ++++++++++++------- sitcom_simulator/video/integrations/ffmpeg.py | 1 - 7 files changed, 31 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index dee8956..cb97c37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.4.1" +version = "0.4.2" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index a504729..3955106 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -18,6 +18,7 @@ def create_sitcom( orientation:str="portrait", resolution:int=1080, narrator_dropout:bool=False, + music_url:str|None=None, ): """ Generates a sitcom video based on a prompt or a script file. @@ -41,6 +42,7 @@ def create_sitcom( :param orientation: The orientation of the video. "landscape", "portrait", or "square". :param resolution: The width of the video to render assuming portrait mode. This takes into account the orientation parameter. :param narrator_dropout: If True, the narrator will be forcibly removed from the script (ChatGPT often goes heavy on the narrators). + :param music_url: A URL to a music track to use for the video. """ from .models import VideoResult from .script import write_script @@ -88,7 +90,10 @@ def create_sitcom( orientation=orientation, ) - script_with_music = add_music(script_with_images) + script_with_music = add_music( + script=script_with_images, + music_url=music_url, + ) final_script = script_with_music diff --git a/sitcom_simulator/cli.py b/sitcom_simulator/cli.py index 3f05604..c1c968c 100644 --- a/sitcom_simulator/cli.py +++ b/sitcom_simulator/cli.py @@ -28,6 +28,7 @@ def _parse_args(): parser.add_argument('--resolution', type=int, default=1080, help="the resolution of the video (passing in 1080 means 1080p)") parser.add_argument('--orientation', type=str, default='portrait', help="the orientation of the video (landscape, portrait, or square)") parser.add_argument('--no-narrators', action='store_true', help="disable narrator characters") + parser.add_argument('--music-url', type=str, help="a URL to a music track to use for the video") args = parser.parse_args() return args @@ -59,4 +60,5 @@ def main(): orientation=args.orientation, resolution=args.resolution, narrator_dropout=args.no_narrators, + music_url=args.music_url, ) \ No newline at end of file diff --git a/sitcom_simulator/models.py b/sitcom_simulator/models.py index 8479137..138d600 100644 --- a/sitcom_simulator/models.py +++ b/sitcom_simulator/models.py @@ -97,6 +97,8 @@ class ScriptMetadata: :param title: The title of the script :param bgm_style: The style of the background music + :param bgm_path: The path to the background music + :param bgm_url: The URL to the background music :param art_style: The style of the art :param prompt: The prompt for the script :param bgm_path: The path to the background music @@ -104,9 +106,10 @@ class ScriptMetadata: """ title: str | None bgm_style: str | None + bgm_path: str | None + bgm_url: str | None art_style: str | None prompt: str | None - bgm_path: str | None orientation: str | None @staticmethod @@ -119,9 +122,10 @@ def from_dict(data: dict): return ScriptMetadata( title=data.get('title'), bgm_style=data.get('bgm_style'), + bgm_path=data.get('bgm_path'), + bgm_url=data.get('bgm_url'), art_style=data.get('art_style'), prompt=data.get('prompt'), - bgm_path=data.get('bgm_path'), orientation=data.get('orientation'), ) diff --git a/sitcom_simulator/music/integrations/freepd.py b/sitcom_simulator/music/integrations/freepd.py index 21f8866..e9db8d6 100644 --- a/sitcom_simulator/music/integrations/freepd.py +++ b/sitcom_simulator/music/integrations/freepd.py @@ -58,6 +58,8 @@ def download_file(url: str): Given a URL, downloads the file and returns the path to the downloaded file. :param url: The URL of the file to download + + :return: The path to the downloaded file """ import requests response = requests.get(url) diff --git a/sitcom_simulator/music/music_generator.py b/sitcom_simulator/music/music_generator.py index fc733ce..55d6237 100644 --- a/sitcom_simulator/music/music_generator.py +++ b/sitcom_simulator/music/music_generator.py @@ -8,7 +8,8 @@ def generate_music( category: str | None, engine:Engine="freepd", - ): + music_url: str | None = None, + ) -> tuple[str, str | None]: """ Generates and returns a path to a music file using the given engine. @@ -16,9 +17,15 @@ def generate_music( :param category: The category of music to generate :param engine: The engine to use for generating music + :param music_url: The URL of the music to use. If provided, category is ignored. + + :return: The path to the generated music file and the url of the music to use """ from .integrations import freepd if engine == "freepd": + if music_url: + logging.debug(f"Using music from URL: {music_url}") + return freepd.download_file(music_url), music_url logging.debug(f"Generating music: {category}") try: freepd_category = freepd.MusicCategory(category) @@ -26,15 +33,15 @@ def generate_music( freepd_category = None if freepd_category is None: freepd_category = random.choice(list(freepd.MusicCategory)) - return freepd.download_random_music(freepd_category) + return freepd.download_random_music(freepd_category), None else: raise ValueError(f"Invalid engine: {engine}") def add_music( script: Script, engine:Engine="freepd", - category: str | None = None, - on_music_generated: Optional[Callable[[str], None]] = None + music_url: str | None = None, + on_music_generated: Optional[Callable[[str], None]] = None, ): """ Given a script, returns the same script but with the music path filled in. @@ -43,10 +50,10 @@ def add_music( :param script: The script to add music to :param engine: The engine to use for generating music - :param category: The category of music to generate + :param music_url: The URL of the music to use. If provided, category is ignored. :param on_music_generated: A callback to call after the music is generated which takes the path to the generated music """ - music_path = generate_music(category) + music_path, music_url = generate_music(category=script.metadata.bgm_style, music_url=music_url, engine=engine) if on_music_generated: on_music_generated(music_path) - return script.replace(metadata=script.metadata.replace(bgm_path=music_path)) \ No newline at end of file + return script.replace(metadata=script.metadata.replace(bgm_path=music_path, bgm_url=music_url)) \ No newline at end of file diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index 10fd779..1827f40 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -203,7 +203,6 @@ def render_clip( frame_offset = f"((on-{total_frames/2})/{math.sqrt(total_frames)})" x_expr = f'(iw/2.0-(iw/zoom/2.0))+{random.uniform(-max_pan, max_pan)}*{frame_offset}' y_expr = f'(ih/2.0-(ih/zoom/2.0))+{random.uniform(-max_pan, max_pan)}*{frame_offset}' - print("x_expr", x_expr, "y_expr", y_expr) video_input = video_input.zoompan( z=zoom_expr, From f8c2cea3f72bc21f24350b89dd142becd5b0f9f2 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 4 Mar 2024 22:57:18 -0700 Subject: [PATCH 25/40] fixed bug in background music selector --- pyproject.toml | 2 +- sitcom_simulator/music/integrations/freepd.py | 6 ++++-- sitcom_simulator/music/music_generator.py | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cb97c37..d6f01da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.4.2" +version = "0.4.3" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/music/integrations/freepd.py b/sitcom_simulator/music/integrations/freepd.py index e9db8d6..ea73a65 100644 --- a/sitcom_simulator/music/integrations/freepd.py +++ b/sitcom_simulator/music/integrations/freepd.py @@ -25,11 +25,13 @@ def values(cls): """ return [str(member.value) for name, member in cls.__members__.items()] -def download_random_music(category: MusicCategory) -> str | None: +def download_random_music(category: MusicCategory) -> tuple[str | None, str]: """ Given a category, downloads a random song from FreePD in that category and returns the path to the downloaded file. :param category: The category of music to download + + :return: The path to the downloaded file """ from bs4 import BeautifulSoup import requests @@ -51,7 +53,7 @@ def download_random_music(category: MusicCategory) -> str | None: song_name = selected_song.find("b").text download_link = "https://freepd.com" + selected_song.find("a", class_="downloadButton")["href"] - return download_file(download_link) + return download_file(download_link), download_link def download_file(url: str): """ diff --git a/sitcom_simulator/music/music_generator.py b/sitcom_simulator/music/music_generator.py index 55d6237..c419131 100644 --- a/sitcom_simulator/music/music_generator.py +++ b/sitcom_simulator/music/music_generator.py @@ -9,7 +9,7 @@ def generate_music( category: str | None, engine:Engine="freepd", music_url: str | None = None, - ) -> tuple[str, str | None]: + ) -> tuple[str, str]: """ Generates and returns a path to a music file using the given engine. @@ -33,7 +33,7 @@ def generate_music( freepd_category = None if freepd_category is None: freepd_category = random.choice(list(freepd.MusicCategory)) - return freepd.download_random_music(freepd_category), None + return freepd.download_random_music(freepd_category) else: raise ValueError(f"Invalid engine: {engine}") From 69efe01bd3db3300cc2dbd045517e170177c40e4 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 4 Mar 2024 23:41:53 -0700 Subject: [PATCH 26/40] minor documentation updates --- README.md | 4 ++-- docs/overview.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 733dd09..1fdcd7b 100644 --- a/README.md +++ b/README.md @@ -73,10 +73,10 @@ create_sitcom( ## How it Works -Sitcom Simulator is essentially duct tape that combines multiple different AI tools into one unholy abomination. +Sitcom Simulator is essentially duct tape that combines various AI tools into one unholy abomination. 1. [ChatGPT](https://chat.openai.com/) generates the video script 2. [FakeYou](https://fakeyou.com) generates voices for the characters -3. [Stable Diffusion](https://stability.ai/stable-image) generates images for the characters +3. [Stable Diffusion](https://stability.ai/stable-image) generates images of the characters 4. [Freepd](https://freepd.com/) provides the background music 5. [FFmpeg](https://ffmpeg.org/) connects the images and voices into a movie diff --git a/docs/overview.rst b/docs/overview.rst index a7ed996..e2bed5c 100644 --- a/docs/overview.rst +++ b/docs/overview.rst @@ -19,11 +19,11 @@ Sitcom Simulator design is focused on the following goals: How does it work? ----------------------------- -Sitcom Simulator is essentially duct tape that combines multiple different AI tools into one unholy abomination. +Sitcom Simulator is essentially duct tape that combines various AI tools into one unholy abomination. #. `ChatGPT `_ generates the video script #. `FakeYou `_ generates voices for the characters -#. `Stable Diffusion `_ generates images for the characters +#. `Stable Diffusion `_ generates images of the characters #. `Freepd `_ provides the background music #. `FFmpeg `_ connects the images and voices into a movie From b62958d4c6ae626f802b2dc42a76cb5cceca03c1 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 5 Mar 2024 14:40:34 -0700 Subject: [PATCH 27/40] Update README.md to fix broken env link --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1fdcd7b..0098adc 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ Sitcom Simulator is designed to be simple to use, but also supports extreme cust ### Command Line -The most basic usage is simply running the `sitcom-simulator` command with no arguments. Many optional arguments are supported as well. Note that you must [set your API key environment variables](#environment-variables) before it will work. +The most basic usage is simply running the `sitcom-simulator` command with no arguments. Many optional arguments are supported as well. Note that you must [set your API key environment variables](https://joshmoody24.github.io/sitcom-simulator/installation.html#environment-variables) before it will work. ```bash sitcom-simulator --prompt "Elon Musk teleports a toaster into the ocean" --style "beautiful renaissance oil painting" @@ -89,4 +89,4 @@ Have fun!!! ## Links - [Documentation](https://joshmoody24.github.io/sitcom-simulator/) -- Web App (coming soon) \ No newline at end of file +- Web App (coming soon) From 19f19fe2a5bc88204a267e879e9f13ea6e24a269 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 5 Mar 2024 14:41:33 -0700 Subject: [PATCH 28/40] Update installation.rst --- docs/installation.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index d8a8814..f112bfa 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -43,9 +43,9 @@ Create a ``.env`` file in your project's root directory, with the following stru .. code-block:: bash - STABILITY_API_KEY='your_key_here - OPENAI_API_KEY='your_key_here + STABILITY_API_KEY='your_key_here' + OPENAI_API_KEY='your_key_here' The ``.env`` file will be automatically detected by the program. -You're ready to make your first meme video! \ No newline at end of file +You're ready to make your first meme video! From b4899aa1144156bcda741440c5d73e6c9b3346b0 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 5 Mar 2024 19:14:10 -0700 Subject: [PATCH 29/40] gitignored .vscode --- .gitignore | 1 + .vscode/launch.json | 16 ---------------- .vscode/settings.json | 3 --- 3 files changed, 1 insertion(+), 19 deletions(-) delete mode 100644 .vscode/launch.json delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index ad6eb22..479c4d4 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ docs/_build/ docs/_autosummary/ docs/_static/ docs/doctrees/ +.vscode # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index b10f2f0..0000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "Python: Current File", - "type": "debugpy", - "request": "launch", - "program": "create_sitcom.py", - "console": "integratedTerminal", - "justMyCode": true - } - ] -} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index a7d0fc7..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "esbonio.sphinx.confDir": "" -} \ No newline at end of file From f9344f9254a828bebd6d1d586512bbab820353cf Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Wed, 6 Mar 2024 22:16:49 -0700 Subject: [PATCH 30/40] added support for aac audio codec --- pyproject.toml | 2 +- sitcom_simulator/auto.py | 21 ++++++++++++------- sitcom_simulator/cli.py | 2 ++ sitcom_simulator/video/integrations/ffmpeg.py | 13 ++++++++++-- sitcom_simulator/video/video_generator.py | 7 ++++--- 5 files changed, 31 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d6f01da..a8428a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.4.3" +version = "0.4.4" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index 3955106..fae0bdf 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -1,24 +1,27 @@ +from typing import Literal + def create_sitcom( - prompt: str | None = None, - art_style: str | None = None, - script_path: str | None = None, - debug_images: bool=False, - debug_audio: bool=False, - font: str = 'Arial', + prompt:str | None = None, + art_style:str | None = None, + script_path:str | None = None, + debug_images:bool=False, + debug_audio:bool=False, + font:str = 'Arial', max_tokens:int=2048, approve_script:bool=False, manual_select_characters:bool=True, upload_to_yt=False, audio_job_delay:int=30, audio_poll_delay:int=10, - caption_bg_style:str="box_shadow", + caption_bg_style:Literal['box_shadow', 'text_shadow', 'none']='box_shadow', save_script:bool=False, speed:float=1, pan_and_zoom:bool=True, - orientation:str="portrait", + orientation:Literal["landscape", "portrait", "square"]="portrait", resolution:int=1080, narrator_dropout:bool=False, music_url:str|None=None, + audio_codec:Literal['mp3', 'aac']='mp3', ): """ Generates a sitcom video based on a prompt or a script file. @@ -43,6 +46,7 @@ def create_sitcom( :param resolution: The width of the video to render assuming portrait mode. This takes into account the orientation parameter. :param narrator_dropout: If True, the narrator will be forcibly removed from the script (ChatGPT often goes heavy on the narrators). :param music_url: A URL to a music track to use for the video. + :param audio_codec: The audio codec to use for the video. mp3 seems to be more compatible with more video players, but aac is higher quality and is necessary for viewing videos in an iPhone browser. """ from .models import VideoResult from .script import write_script @@ -109,6 +113,7 @@ def create_sitcom( orientation=orientation, speed=speed, pan_and_zoom=pan_and_zoom, + audio_codec=audio_codec, ) result = VideoResult( diff --git a/sitcom_simulator/cli.py b/sitcom_simulator/cli.py index c1c968c..8006535 100644 --- a/sitcom_simulator/cli.py +++ b/sitcom_simulator/cli.py @@ -29,6 +29,7 @@ def _parse_args(): parser.add_argument('--orientation', type=str, default='portrait', help="the orientation of the video (landscape, portrait, or square)") parser.add_argument('--no-narrators', action='store_true', help="disable narrator characters") parser.add_argument('--music-url', type=str, help="a URL to a music track to use for the video") + parser.add_argument('--audio-codec', type=str, help="the audio codec to use for the video: mp3 or aac", default='mp3') args = parser.parse_args() return args @@ -61,4 +62,5 @@ def main(): resolution=args.resolution, narrator_dropout=args.no_narrators, music_url=args.music_url, + audio_codec=args.audio_codec, ) \ No newline at end of file diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index 1827f40..4ba3f4b 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -8,6 +8,7 @@ import atexit from dataclasses import dataclass import math +from typing import Literal FRAME_RATE = 24 @@ -126,6 +127,7 @@ def render_clip( clip_settings:ClipSettings=ClipSettings(), caption_settings:CaptionSettings=CaptionSettings(), caption_bg_settings:BoxSettings|ShadowSettings=BoxSettings(), + audio_codec:Literal['mp3', 'aac']='mp3', ): """ Renders a video clip from the given clip object and returns the path to the rendered video file. @@ -138,6 +140,7 @@ def render_clip( :param clip_settings: The settings for rendering the video clip :param caption_settings: The settings for the captions :param caption_bg_settings: The settings for the caption background + :param audio_codec: The audio codec to use for the output video """ width = int(round(width)) height = int(round(height)) @@ -249,7 +252,7 @@ def render_clip( input_streams = [video_input] if audio_input is None else [video_input, audio_input] with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file: intermediate_clip = ( - ffmpeg.output(*input_streams, temp_file.name, vcodec='libx264', preset='superfast', acodec='mp3', t=duration) + ffmpeg.output(*input_streams, temp_file.name, vcodec='libx264', preset='superfast', acodec=audio_codec, t=duration) .overwrite_output() .run(capture_stderr=True, overwrite_output=True) ) @@ -265,6 +268,7 @@ def concatenate_clips( output_filename: str, background_music:str|None=None, bgm_volume:float=-24, + audio_codec:Literal['mp3', 'aac']='mp3', ): """ Combines the given video clips into a single video file and returns the path to the concatenated video file. @@ -273,6 +277,7 @@ def concatenate_clips( :param output_filename: The name of the output file :param background_music: The path to the background music file :param bgm_volume: The volume of the background music, between 0 and 1 + :param audio_codec: The audio codec to use for the output video """ import ffmpeg @@ -311,7 +316,7 @@ def concatenate_clips( sanitized_filename, vcodec='libx264', pix_fmt='yuv420p', # necessary for compatibility - acodec='mp3', + acodec=audio_codec, r=FRAME_RATE, **{'b:v': '8000K'} ) @@ -332,6 +337,7 @@ def render_video( caption_settings:CaptionSettings=CaptionSettings(), caption_bg_settings:BoxSettings|ShadowSettings=BoxSettings(), bgm_volume:float=-24, + audio_codec:Literal['mp3', 'aac']='mp3', ): """ Renders a video from the given script and returns the path to the rendered video file. @@ -348,6 +354,7 @@ def render_video( :param caption_settings: The settings for the captions :param caption_bg_settings: The settings for the caption background :param bgm_volume: The volume of the background music, good values are between -24 and -16 + :param audio_codec: The audio codec to use for the output video """ intermediate_clips = [] for clip in tqdm(script.clips, desc="Rendering intermediate video clips"): @@ -360,6 +367,7 @@ def render_video( caption_bg_settings=caption_bg_settings, speed=speed, pan_and_zoom=pan_and_zoom, + audio_codec=audio_codec, ) intermediate_clips.append(clip_file) @@ -369,6 +377,7 @@ def render_video( output_path, background_music=script.metadata.bgm_path, bgm_volume=bgm_volume, + audio_codec=audio_codec, ) return final_video_path \ No newline at end of file diff --git a/sitcom_simulator/video/video_generator.py b/sitcom_simulator/video/video_generator.py index 961754f..4ff26d5 100644 --- a/sitcom_simulator/video/video_generator.py +++ b/sitcom_simulator/video/video_generator.py @@ -1,8 +1,6 @@ from typing import List, Literal from ..models import Script -CaptionBg = Literal['box_shadow', 'text_shadow', 'none'] - def render_video( script: Script, font: str, @@ -14,7 +12,7 @@ def render_video( clip_buffer_seconds:float=0.35, min_clip_seconds:float=1.5, speaking_delay_seconds:float=0.12, - caption_bg_style:CaptionBg='box_shadow', + caption_bg_style:Literal['box_shadow', 'text_shadow', 'none']='box_shadow', caption_bg_alpha:float=0.6, caption_bg_color:str="black", caption_bg_shadow_distance_x:float=5, @@ -23,6 +21,7 @@ def render_video( min_zoom_factor:float=1.05, max_pan_speed:float=6, bgm_volume:float=-24, + audio_codec:Literal['mp3', 'aac']='mp3', ): """ Renders a video from the given script and returns the path to the rendered video. @@ -46,6 +45,7 @@ def render_video( :param min_zoom_factor: The minimum zoom factor for pan and zoom :param max_pan_speed: The maximum pan speed for pan and zoom :param bgm_volume: The volume of the background music + :param audio_codec: The audio codec to use for the video. mp3 seems to be more compatible with more video players, but aac is higher quality and is necessary for viewing videos in an iPhone browser. """ # rely on image_path first, but if it's not there and image_url is, download the image @@ -126,4 +126,5 @@ def render_video( ), caption_bg_settings=caption_bg_settings, bgm_volume=bgm_volume, + audio_codec=audio_codec, ) \ No newline at end of file From 07cf7b829187dcb3036b6d7b32e4b0898c96ef82 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 18 Mar 2024 17:27:15 -0600 Subject: [PATCH 31/40] improved character detection and video rendering settings --- pyproject.toml | 3 +- requirements.txt | 1 + .../character_extraction_instructions.txt | 2 +- .../fakeyou/character_extractor.py | 31 ++++++++++++------- sitcom_simulator/script/script_generator.py | 2 +- sitcom_simulator/video/integrations/ffmpeg.py | 6 +++- 6 files changed, 30 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a8428a2..22ddc11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.4.4" +version = "0.5.0" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] @@ -23,6 +23,7 @@ dependencies = [ "Pillow~=10.0.1", "beautifulsoup4~=4.12.2", "requests~=2.31.0", + "thefuzz~=0.22.1", "toml", # "fakeyou==1.2.5", Currently using raw HTTP requests instead # "moviepy==1.0.3", No longer supported due to lack of features. Using ffmpeg-python instead diff --git a/requirements.txt b/requirements.txt index 7a6cd90..bde37e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ Pillow~=10.0.1 beautifulsoup4~=4.12.2 requests~=2.31.0 mypy~=1.8.0 +thefuzz~=0.22.1 toml \ No newline at end of file diff --git a/sitcom_simulator/script/integrations/fakeyou/character_extraction_instructions.txt b/sitcom_simulator/script/integrations/fakeyou/character_extraction_instructions.txt index dcf99bb..d0f5164 100644 --- a/sitcom_simulator/script/integrations/fakeyou/character_extraction_instructions.txt +++ b/sitcom_simulator/script/integrations/fakeyou/character_extraction_instructions.txt @@ -4,5 +4,5 @@ Generate a list of potential characters to use in a short video of this prompt: Your results will be searched for in the FakeYou database for potential AI voices to use. The characters must be likely to have an AI voice on the internet somewhere, e.g., famous people/characters. -Keep the list short and focused. +Keep the list short and focused on the user's prompt. Structure your output as a pure JSON list of strings, no markdown. \ No newline at end of file diff --git a/sitcom_simulator/script/integrations/fakeyou/character_extractor.py b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py index 63edf56..9d8785a 100644 --- a/sitcom_simulator/script/integrations/fakeyou/character_extractor.py +++ b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py @@ -8,6 +8,9 @@ import os import csv +def normalize_string(s): + return re.sub(r'\W+', '', s).lower() + def load_curated_voices(): """ Loads the curated voices from the 'curated_voices.csv' file in the same directory as this script. @@ -31,8 +34,6 @@ def generate_character_list(prompt: str, custom_instructions: str | None=None) - :param prompt: The user-submitted prompt :param custom_instructions: A string containing custom instructions for the language model. Must contain the placeholder '{prompt}'. """ - - if custom_instructions: instructions = custom_instructions else: @@ -53,7 +54,7 @@ def generate_character_list(prompt: str, custom_instructions: str | None=None) - raw_response = chat(instructions) logging.debug("Raw character extractor response from LLM:", raw_response) character_names = json.loads(raw_response) - print("Characters proposed:", ", ".join(character_names)) + logging.debug("Characters proposed:", ", ".join(character_names), "\n") # TODO: cache data from fakeyou to avoid lots of hits? res = requests.get('https://api.fakeyou.com/tts/list') @@ -64,16 +65,24 @@ def generate_character_list(prompt: str, custom_instructions: str | None=None) - chosen_characters = [] for name in character_names: # TODO (big maybe) if tts doesn't exist but vtv does, render tts in someone else's voice and then use vtv - if name.lower() not in name_to_model: - continue - matches = name_to_model[name.lower()] - # find the highest-rated match - highest_rated_voice = max(matches, key=lambda model: calculate_star_rating(model, curated_characters)) - chosen_characters.append(Character(name=name, voice_token=highest_rated_voice['model_token'])) + from thefuzz import process + SIMILARITY_CUTOFF = 75 # out of 100 + match, score = process.extractOne(normalize_string(name), list(name_to_model.keys()), score_cutoff=SIMILARITY_CUTOFF) + if match: + logging.debug(f"Matched {name} to {match} with score {score}") + voices = name_to_model[match.lower()] + # find the highest-rated match + highest_rated_voice = max(voices, key=lambda model: calculate_star_rating(model, curated_characters)) + chosen_characters.append(Character(name=name, voice_token=highest_rated_voice['model_token'])) logging.info("Selected voices:", ", ".join([c.name for c in chosen_characters])) # guarantee at least one voice (narrator) - chosen_characters.append(random.choice(BACKUP_NARRATORS)) + if len(chosen_characters) == 0: + print("No voices selected. Defaulting to narrator.") + logging.info("No voices selected. Defaulting to narrator.") + chosen_characters.append(random.choice(BACKUP_NARRATORS)) + + print("Characters selected:", ", ".join([c.name for c in chosen_characters]), "\n") return chosen_characters @@ -107,7 +116,7 @@ def pure_character_name(raw_name: str): """ match = NAME_PATTERN.search(raw_name) if match: - return match.group(1) + return normalize_string(match.group(1)) return None DEFAULT_RATING = 2 # not the worst possible, but pretty bad diff --git a/sitcom_simulator/script/script_generator.py b/sitcom_simulator/script/script_generator.py index 133a627..0506e42 100644 --- a/sitcom_simulator/script/script_generator.py +++ b/sitcom_simulator/script/script_generator.py @@ -77,7 +77,7 @@ def write_script( if len(script.clips) == 0: raise ValueError("Narrator dropout resulted in an empty script. Please try again.") logging.debug("TOML script", script) - print(formatted_script(script)) + print(formatted_script(script), "\n") if(require_approval): validated = None while validated not in ["y", "n", "q"]: diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index 4ba3f4b..378cdbb 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -11,6 +11,8 @@ from typing import Literal FRAME_RATE = 24 +MAX_CLIP_SECONDS = 15 +FFMPEG_QUALITY:Literal["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"] = "slow" @dataclass class ShadowSettings: @@ -166,6 +168,7 @@ def render_clip( duration = audio_duration + clip_settings.clip_buffer_seconds + clip_settings.speaking_delay_seconds duration = max(duration, clip_settings.min_clip_seconds) + duration = min(duration, MAX_CLIP_SECONDS) # maximum duration for a clip (to prevent long AI audio glitches) duration = duration / speed if clip.duration and not clip.speaker: # 'not speaker' in case the llm forgets proper syntax duration = clip.duration @@ -180,7 +183,7 @@ def render_clip( # the zoom effect is jittery for some strange reason # but if we upscale the image first, the jitter is less noticeable # at the cost of slower rendering - prezoom_scale_factor = 3 if pan_and_zoom else 1 + prezoom_scale_factor = 2 if pan_and_zoom else 1 prezoom_scale_width = int(width * prezoom_scale_factor) prezoom_scale_height = int(height * prezoom_scale_factor) video_input = ( @@ -318,6 +321,7 @@ def concatenate_clips( pix_fmt='yuv420p', # necessary for compatibility acodec=audio_codec, r=FRAME_RATE, + preset=FFMPEG_QUALITY, **{'b:v': '8000K'} ) .overwrite_output() From 0506990c5d135b4cca905fdd9d81fa3392326918 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Thu, 21 Mar 2024 19:59:41 -0600 Subject: [PATCH 32/40] fixed crash when trying to use obscure characters --- pyproject.toml | 2 +- .../script/integrations/fakeyou/character_extractor.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 22ddc11..2ad987b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.5.0" +version = "0.5.1" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/script/integrations/fakeyou/character_extractor.py b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py index 9d8785a..f62e784 100644 --- a/sitcom_simulator/script/integrations/fakeyou/character_extractor.py +++ b/sitcom_simulator/script/integrations/fakeyou/character_extractor.py @@ -67,8 +67,9 @@ def generate_character_list(prompt: str, custom_instructions: str | None=None) - # TODO (big maybe) if tts doesn't exist but vtv does, render tts in someone else's voice and then use vtv from thefuzz import process SIMILARITY_CUTOFF = 75 # out of 100 - match, score = process.extractOne(normalize_string(name), list(name_to_model.keys()), score_cutoff=SIMILARITY_CUTOFF) - if match: + extraction = process.extractOne(normalize_string(name), list(name_to_model.keys()), score_cutoff=SIMILARITY_CUTOFF) + if extraction: + match, score = extraction logging.debug(f"Matched {name} to {match} with score {score}") voices = name_to_model[match.lower()] # find the highest-rated match From 6caa587090e369182d988df4927f88549a8dc099 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 6 Aug 2024 13:46:04 -0600 Subject: [PATCH 33/40] Linked web app in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0098adc..f8041f5 100644 --- a/README.md +++ b/README.md @@ -89,4 +89,4 @@ Have fun!!! ## Links - [Documentation](https://joshmoody24.github.io/sitcom-simulator/) -- Web App (coming soon) +- [Web app] (https://sitcom-simulator.com) From fd3c569c469e89d40c718ba7407c0fa8bc6e18a6 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Tue, 6 Aug 2024 13:46:24 -0600 Subject: [PATCH 34/40] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f8041f5..02daafa 100644 --- a/README.md +++ b/README.md @@ -89,4 +89,4 @@ Have fun!!! ## Links - [Documentation](https://joshmoody24.github.io/sitcom-simulator/) -- [Web app] (https://sitcom-simulator.com) +- [Web app](https://sitcom-simulator.com) From 132c6691cc5a1d4e771e1bba7bd0e483456c90a4 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Sat, 10 Aug 2024 19:30:54 -0600 Subject: [PATCH 35/40] changed default chatgpt model to gpt-4o-mini --- pyproject.toml | 2 +- sitcom_simulator/script/integrations/chatgpt/chatgpt.py | 2 +- sitcom_simulator/script/script_generator.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2ad987b..2211968 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.5.1" +version = "0.5.2" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/script/integrations/chatgpt/chatgpt.py b/sitcom_simulator/script/integrations/chatgpt/chatgpt.py index fd8bfaa..53ee58e 100644 --- a/sitcom_simulator/script/integrations/chatgpt/chatgpt.py +++ b/sitcom_simulator/script/integrations/chatgpt/chatgpt.py @@ -2,7 +2,7 @@ def chat( prompt: str, max_tokens:int=2048, temperature:float=1, - model: str="gpt-3.5-turbo", + model: str="gpt-4o-mini", ): """ Given a prompt, returns a response from ChatGPT. diff --git a/sitcom_simulator/script/script_generator.py b/sitcom_simulator/script/script_generator.py index 0506e42..42f1ff8 100644 --- a/sitcom_simulator/script/script_generator.py +++ b/sitcom_simulator/script/script_generator.py @@ -10,7 +10,7 @@ def write_script( max_tokens:int=2048, require_approval:bool=False, temperature:float=0.5, - model:str="gpt-3.5-turbo", + model:str="gpt-4o-mini", custom_script_instructions: str | None=None, custom_character_instructions: str | None=None, fakeyou_characters:bool=True, From 92769c0464a240f0215330481a2e1c91a1e854c2 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Sat, 10 Aug 2024 19:48:43 -0600 Subject: [PATCH 36/40] Changed text background to text_shadow by default, instaed of box_shadow --- pyproject.toml | 2 +- sitcom_simulator/auto.py | 2 +- sitcom_simulator/cli.py | 4 ++-- sitcom_simulator/video/integrations/ffmpeg.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2211968..c0994e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.5.2" +version = "0.6.0" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/auto.py b/sitcom_simulator/auto.py index fae0bdf..b575c8e 100644 --- a/sitcom_simulator/auto.py +++ b/sitcom_simulator/auto.py @@ -13,7 +13,7 @@ def create_sitcom( upload_to_yt=False, audio_job_delay:int=30, audio_poll_delay:int=10, - caption_bg_style:Literal['box_shadow', 'text_shadow', 'none']='box_shadow', + caption_bg_style:Literal['box_shadow', 'text_shadow', 'none']='text_shadow', save_script:bool=False, speed:float=1, pan_and_zoom:bool=True, diff --git a/sitcom_simulator/cli.py b/sitcom_simulator/cli.py index 8006535..b28d9a3 100644 --- a/sitcom_simulator/cli.py +++ b/sitcom_simulator/cli.py @@ -21,7 +21,7 @@ def _parse_args(): parser.add_argument('--font', type=str, help="the font to use for the video", default='Arial') parser.add_argument('--audio-job-delay', type=int, default=30, help="the number of seconds to wait between starting audio generation jobs. Lower values render faster but are more likely to get rate limited") parser.add_argument('--audio-poll-delay', type=int, default=10, help="the number of seconds to wait between polling for audio generation job completion") - parser.add_argument('--text-shadow', action='store_true', help="use text shadow for captions instead of box background") + parser.add_argument('--box-shadow', action='store_true', help="use box background for captions instead of text shadow") parser.add_argument('--save-script', action='store_true', help="save the generated script to a file") parser.add_argument('--speed', type=float, default=1, help="speed up the final video by this factor (1.0 is normal speed)") parser.add_argument('--no-pan-and-zoom', action='store_true', help="disable pan and zoom effect on images") @@ -54,7 +54,7 @@ def main(): upload_to_yt=args.upload, audio_job_delay=args.audio_job_delay, audio_poll_delay=args.audio_poll_delay, - caption_bg_style="text_shadow" if args.text_shadow else "box_shadow", + caption_bg_style="box_shadow" if args.box_shadow else "text_shadow", save_script=args.save_script, speed=args.speed, pan_and_zoom=not args.no_pan_and_zoom, diff --git a/sitcom_simulator/video/integrations/ffmpeg.py b/sitcom_simulator/video/integrations/ffmpeg.py index 378cdbb..1b132bc 100644 --- a/sitcom_simulator/video/integrations/ffmpeg.py +++ b/sitcom_simulator/video/integrations/ffmpeg.py @@ -241,7 +241,7 @@ def render_clip( 'drawtext', text=caption if caption else random.choice(failed_image_captions), fontfile=caption_settings.font, - fontsize=42 * scale_factor, # scales the font size with 720px as the reference screen width + fontsize=48 * scale_factor, # scales the font size with 720px as the reference screen width fontcolor='white', text_align="M+C", # had to dig deep into FFmpeg source code to learn that you combine flags with a plus sign x='(w - text_w) / 2', From de6b9c2b9f917872b1dddb9b56820d1404c58073 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Sat, 10 Aug 2024 20:01:37 -0600 Subject: [PATCH 37/40] fixed problem with new text defaults --- pyproject.toml | 2 +- sitcom_simulator/video/video_generator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c0994e8..4aa02b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.6.0" +version = "0.6.1" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] diff --git a/sitcom_simulator/video/video_generator.py b/sitcom_simulator/video/video_generator.py index 4ff26d5..ecc14a0 100644 --- a/sitcom_simulator/video/video_generator.py +++ b/sitcom_simulator/video/video_generator.py @@ -12,7 +12,7 @@ def render_video( clip_buffer_seconds:float=0.35, min_clip_seconds:float=1.5, speaking_delay_seconds:float=0.12, - caption_bg_style:Literal['box_shadow', 'text_shadow', 'none']='box_shadow', + caption_bg_style:Literal['box_shadow', 'text_shadow', 'none']='text_shadow', caption_bg_alpha:float=0.6, caption_bg_color:str="black", caption_bg_shadow_distance_x:float=5, From c2c0a60be698821c1d2fd1f01a20477e70352ca6 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 10 Feb 2025 12:42:22 -0700 Subject: [PATCH 38/40] Fix to accommodate FakeYou API changes --- pyproject.toml | 6 ++-- .../speech/integrations/fakeyou.py | 28 ++++++++++++++----- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4aa02b5..3e7ea12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.6.1" +version = "0.6.2rc1" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] @@ -28,7 +28,7 @@ dependencies = [ # "fakeyou==1.2.5", Currently using raw HTTP requests instead # "moviepy==1.0.3", No longer supported due to lack of features. Using ffmpeg-python instead ] -requires-python = ">= 3.11" +requires-python = ">=3.11,<3.13" [project.optional-dependencies] dev = [ @@ -52,4 +52,4 @@ build-backend = "hatchling.build" include = ["sitcom_simulator/**/*"] [project.scripts] -sitcom-simulator = "sitcom_simulator.cli:main" \ No newline at end of file +sitcom-simulator = "sitcom_simulator.cli:main" diff --git a/sitcom_simulator/speech/integrations/fakeyou.py b/sitcom_simulator/speech/integrations/fakeyou.py index 991c878..02b9fe0 100644 --- a/sitcom_simulator/speech/integrations/fakeyou.py +++ b/sitcom_simulator/speech/integrations/fakeyou.py @@ -22,20 +22,34 @@ def download_voice(url: str): :param url: The URL of the audio to download """ - logging.info("downloading audio:", url) + logging.info(f"Downloading audio from: {url}") temp_audio_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) atexit.register(os.remove, temp_audio_file.name) + try: # uses urllib because AWS lambda doesn't have requests (not that that matters anymore) - with urllib.request.urlopen(url) as response, open(temp_audio_file.name, 'wb') as out_file: - data = response.read() # Read the content as bytes + # Create a request with a browser-like User-Agent (otherwise 403 on FakeYou's new CDN) + req = urllib.request.Request( + url, + headers={ + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", + "Accept": "*/*", + "Connection": "keep-alive", + "Referer": "https://fakeyou.com/", # FakeYou might check this + } + ) + + # Open the URL and write the content to a file + with urllib.request.urlopen(req) as response, open(temp_audio_file.name, 'wb') as out_file: + data = response.read() out_file.write(data) + + logging.info(f"Audio downloaded to: {temp_audio_file.name}") return temp_audio_file.name + except urllib.error.HTTPError as e: - # Handle HTTP errors raise Exception(f"Failed to download audio from URL: {url}. Status code: {e.code}") except urllib.error.URLError as e: - # Handle URL errors (e.g., network issues) raise Exception(f"Failed to download audio from URL: {url}. Error: {e.reason}") def fetch_voicelist(): @@ -202,7 +216,7 @@ def generate_voices( completed = True total_poll_time = time.time() - polling_start_time audio_path = json["state"]["maybe_public_bucket_wav_audio_path"] - audio_url = f'https://storage.googleapis.com/vocodes-public{audio_path}' + audio_url = f'https://cdn-2.fakeyou.com{audio_path}' audio_urls.append(audio_url) if(on_voice_url_generated): on_voice_url_generated(i, audio_url) @@ -213,4 +227,4 @@ def generate_voices( # sleep the remaining time before next job remaining_delay = max(0, rand_job_delay - total_poll_time) time.sleep(remaining_delay) - return audio_urls \ No newline at end of file + return audio_urls From ff2a1c610c93368c782c186dbb408021b20a7063 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Mon, 10 Feb 2025 13:32:54 -0700 Subject: [PATCH 39/40] Bumping from release candidate to release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3e7ea12..b86844a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "sitcom-simulator" -version = "0.6.2rc1" +version = "0.6.2" authors = [ { name = "Josh Moody", email = "josh@joshmoody.org" }, ] From fd3d99ac00d545cb086d7e7620e5032b9abcdb99 Mon Sep 17 00:00:00 2001 From: Josh Moody Date: Wed, 30 Apr 2025 12:24:00 -0600 Subject: [PATCH 40/40] Updated README with new web app domain name --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 02daafa..0faaf0d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ # Sitcom Simulator A highly-customizable tool that automatically creates AI-generated meme videos +## Update + +Sitcom Simulator's web app has moved to [https://sitcom-simulator.net](https://sitcom-simulator.net), instead of `.com` due to the old domain expiring and getting scalped 😅 + `pip install sitcom-simulator` ## Documentation @@ -89,4 +93,5 @@ Have fun!!! ## Links - [Documentation](https://joshmoody24.github.io/sitcom-simulator/) -- [Web app](https://sitcom-simulator.com) +- [sitcom-simulator.net](https://sitcom-simulator.net) + - Formerly `sitcom-simulator.com`