<a href="https://colab.research.google.com/github/gio961gio/Music-to-Image-Interpolation/blob/main/Music_to_Image_Interpolation_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1) SETUP (It takes between 10 and 15 minutes)


In [None]:
# @title Clone Repository
import sys

!git clone https://github.com/gio961gio/Music-to-Image-Interpolation.git
sys.path.append("/content/Music-to-Image-Interpolation/Scripts")


In [None]:
# @title Install Tesseract
%%capture
!sudo apt install tesseract-ocr

In [None]:
# @title Install "anything2image" Package

!pip install anything2image

In [None]:
# @title Install Packages
from tqdm import tqdm
import subprocess

packages_to_install = ["diffusers", "pydub", "pytesseract", "torchvision==0.16.2", "torchaudio --upgrade"]

for package in tqdm(packages_to_install, desc="Installing packages"):
    if "torchvision==0.16.2" in package:
        subprocess.run("pip install torchvision==0.16.2", shell=True, capture_output=True)
    elif "torchaudio --upgrade" in package:
        subprocess.run("pip install torchaudio --upgrade", shell=True, capture_output=True)
    else:
        subprocess.run(["pip", "install", package], capture_output=True)


In [None]:
# @title Install Image_Generator
import anything2image.imagebind as ib
import torch
from diffusers import StableUnCLIPImg2ImgPipeline

# construct models
device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
    "stabilityai/stable-diffusion-2-1-unclip", torch_dtype=torch.float16
).to(device)
model = ib.imagebind_huge(pretrained=True).eval().to(device)



In [None]:
# @title Install Inpainter
#### Calling detexting model parts ####

from detext import TesseractTextDetector, LocalSDInpainter


text_detector = TesseractTextDetector('/usr/bin/tesseract')
inpainter = LocalSDInpainter()



In [None]:
# @title Create Folders
import os

# Definisci il percorso delle cartelle che desideri creare
x = '/content/audio'
y = '/content/audio_segments'

# Utilizza la funzione os.makedirs() per creare le cartelle
os.makedirs(x)
os.makedirs(y)


# 2) AUDIO CHOP

In [None]:
# @title Load Audio
from pydub import AudioSegment
import os
import shutil
import librosa
import numpy as np
import soundfile as sf

from google.colab import files
uploaded = files.upload()
import shutil

# Definisci il percorso della cartella di destinazione
cartella_destinazione = '/content/audio'
# Definisci il nome del file caricato
nome_file_caricato = list(uploaded.keys())[0]

# Sposta il file nella cartella di destinazione
shutil.move(nome_file_caricato, cartella_destinazione)


In [None]:
# @title Segments Number
from audio_stuff import Audio_stuff

num_segments = 5 # @param {type:"number"}


audio_processing = Audio_stuff(num_segments)
input_audio_path = audio_processing.input_audio_path



In [None]:
# @title Audio to Image Embedding
import os
cartella = '/content/audio_segments'
prompts = []

from IPython.display import HTML
from base64 import b64encode

def visualize_video_colab(video_path):
    mp4 = open(video_path,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML("""
    <video width=400 controls>
        <source src="%s" type="video/mp4">
    </video>
    """ % data_url)



with torch.no_grad():
  for x in os.listdir(cartella):
    audio_paths= [os.path.join(cartella, x)]
    embeddings = model.forward({
        ib.ModalityType.AUDIO: ib.load_and_transform_audio_data(audio_paths, device),
    })
    embeddings = embeddings[ib.ModalityType.AUDIO]
    prompts.append(embeddings)


# 3) TEST

In [None]:
# @title Generate
from interpolation_module import walk
from audio_stuff import stuff_for_test



fps = 7 # @param {type:"number"}

batch_size = 7 # @param {type:"number"}

detext = True # @param {type:"boolean"}


num_interpolation_steps, audio_offsets = stuff_for_test(input_audio_path,(num_segments+1),fps)
if num_interpolation_steps[0] % batch_size!=0:
  raise ValueError( f" 'batch_size' value must be a divider of {num_interpolation_steps[0]} ")




with torch.no_grad():
    video_path = walk(prompts=prompts,
                      seeds=[42]*len(prompts),
                      num_interpolation_steps=num_interpolation_steps,
                      audio_filepath=input_audio_path,
                      audio_start_sec=audio_offsets[0],
                      batch_size=batch_size,
                      fps=fps,
                      name='name',
                      num_inference_steps=20 # @param {type:"number"}
                      ,detext = detext,
                      inpainter = inpainter,
                      text_detector = text_detector,
                      pipe = pipe
                      )

visualize_video_colab(video_path)

In [None]:
# @title Download Video
from google.colab import files

# Definisci il percorso del file da scaricare
percorso_file = "/content/dreams/name/name.mp4"

# Scarica il file
files.download(percorso_file)



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# @title Reset for load a different audio track (optional)   ----> then come back to  "2) Audio Chop"

import shutil
import os

# Percorsi delle cartelle da eliminare
folder1_path = "/content/audio"
folder2_path = "/content/audio_segments"

# Elimina la prima cartella se esiste
if os.path.exists(folder1_path):
    shutil.rmtree(folder1_path)


# Elimina la seconda cartella se esiste
if os.path.exists(folder2_path):
    shutil.rmtree(folder2_path)


# Definisci il percorso delle cartelle che desideri creare
x = '/content/audio'
y = '/content/audio_segments'

# Utilizza la funzione os.makedirs() per creare le cartelle
os.makedirs(x)
os.makedirs(y)
