<a href="https://colab.research.google.com/github/davvoz/Arkanoid-by-gpt-3/blob/master/psycovideo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import cv2
from moviepy.editor import *
from scipy.io import wavfile
from pydub import AudioSegment
from PIL import Image
import torch
from torchvision import transforms
from google.colab import files
from tqdm.notebook import tqdm
import gc
import pickle

def load_audio(audio_file):
    print(f"Caricamento del file audio: {audio_file}")
    if audio_file.lower().endswith('.mp3'):
        audio = AudioSegment.from_mp3(audio_file)
        audio.export("temp_audio.wav", format="wav")
        sample_rate, audio_data = wavfile.read("temp_audio.wav")
        os.remove("temp_audio.wav")
    else:
        sample_rate, audio_data = wavfile.read(audio_file)

    if len(audio_data.shape) > 1:
        audio_data = audio_data.mean(axis=1)

    audio_data = audio_data.astype(np.float32)
    audio_data /= np.max(np.abs(audio_data))

    print(f"File audio caricato. Sample rate: {sample_rate}, Durata: {len(audio_data)/sample_rate:.2f} secondi")
    return sample_rate, audio_data

def compute_frequency_energies(audio_data, sample_rate, fps, chunk_duration=0.1):
    print("Calcolo delle energie delle frequenze...")
    chunk_samples = int(chunk_duration * sample_rate)
    num_chunks = len(audio_data) // chunk_samples

    energies = np.zeros((3, num_chunks))

    for i in tqdm(range(num_chunks), desc="Analisi audio", unit="chunk"):
        chunk = audio_data[i*chunk_samples:(i+1)*chunk_samples]
        fft = np.abs(np.fft.rfft(chunk))
        energies[0, i] = np.sum(fft[:int(len(fft)*0.1)])  # Bass
        energies[1, i] = np.sum(fft[int(len(fft)*0.1):int(len(fft)*0.5)])  # Mid
        energies[2, i] = np.sum(fft[int(len(fft)*0.5):])  # High

    energies = (energies - np.min(energies, axis=1, keepdims=True)) / (np.max(energies, axis=1, keepdims=True) - np.min(energies, axis=1, keepdims=True) + 1e-8)

    print("Interpolazione delle energie...")
    x_original = np.linspace(0, 1, num_chunks)
    x_interpolated = np.linspace(0, 1, int(len(audio_data) / sample_rate * fps))
    energies_interpolated = np.array([np.interp(x_interpolated, x_original, energy) for energy in energies])

    print("Calcolo delle energie completato")
    return energies_interpolated

def apply_effects_gpu(img_tensor, bass, mid, high):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    img_tensor = img_tensor.to(device)

    brightness_factor = 1 + bass * 1.5
    img_tensor = transforms.functional.adjust_brightness(img_tensor, brightness_factor)

    saturation_factor = 1 + mid * 2.0
    img_tensor = transforms.functional.adjust_saturation(img_tensor, saturation_factor)

    contrast_factor = 1 + high * 1.2
    img_tensor = transforms.functional.adjust_contrast(img_tensor, contrast_factor)

    r, g, b = img_tensor.unbind(0)
    r = r * (1 + bass * 0.4)
    g = g * (1 + mid * 0.4)
    b = b * (1 + high * 0.4)
    img_tensor = torch.stack([r, g, b])

    if mid > 0.6:
        blur_radius = int(mid * 8)
        blur_radius = blur_radius if blur_radius % 2 == 1 else blur_radius + 1
        img_tensor = transforms.functional.gaussian_blur(img_tensor.unsqueeze(0), (blur_radius, blur_radius)).squeeze(0)

    if high > 0.7:
        rotation_angle = high * 15
        img_tensor = transforms.functional.rotate(img_tensor, rotation_angle)

    if high > 0.85:
        img_tensor = 1 - img_tensor

    if bass > 0.75:
        zoom_factor = 1 + (bass - 0.75) * 0.3
        orig_size = img_tensor.shape[-2:]
        img_tensor = transforms.functional.resize(img_tensor, [int(s * zoom_factor) for s in orig_size])
        img_tensor = transforms.functional.center_crop(img_tensor, orig_size)

    return (img_tensor.cpu().numpy() * 255).astype(np.uint8).transpose(1, 2, 0)

def process_frame_gpu(img_tensor, energies, frame_num):
    bass, mid, high = energies[:, frame_num]
    return apply_effects_gpu(img_tensor, bass, mid, high)

# Step 1: Preparazione dei dati e calcolo delle energie
def step1_prepare_data(image_folder, audio_file, fps, test_duration, output_size):
    print("Step 1: Preparazione dei dati e calcolo delle energie")

    image_files = sorted([f for f in os.listdir(image_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
    print(f"Trovati {len(image_files)} file immagine")

    sample_rate, audio_data = load_audio(audio_file)
    audio_duration = len(audio_data) / sample_rate

    start_time = max(0, (audio_duration - test_duration) / 2)
    end_time = min(audio_duration, start_time + test_duration)

    start_sample = int(start_time * sample_rate)
    end_sample = int(end_time * sample_rate)

    audio_data_test = audio_data[start_sample:end_sample]

    energies = compute_frequency_energies(audio_data_test, sample_rate, fps)

    data = {
        'image_files': image_files,
        'energies': energies,
        'audio_file': audio_file,
        'start_time': start_time,
        'end_time': end_time,
        'output_size': output_size,
        'fps': fps
    }

    with open('step1_data.pkl', 'wb') as f:
        pickle.dump(data, f)

    print("Step 1 completato. Dati salvati in 'step1_data.pkl'")

# Step 2: Elaborazione dei frame
def step2_process_frames(image_folder, batch_size=10, image_duration=1, start_frame=0):
    print("Step 2: Elaborazione dei frame")

    with open('step1_data.pkl', 'rb') as f:
        data = pickle.load(f)

    image_files = data['image_files']
    energies = data['energies']
    output_size = data['output_size']
    fps = data['fps']

    transform = transforms.Compose([
        transforms.Resize(output_size),
        transforms.ToTensor(),
    ])

    total_frames = energies.shape[1]
    print(f"Numero totale di frame da processare: {total_frames}")

    frames = []

    for i in tqdm(range(start_frame, total_frames, batch_size), desc="Elaborazione batch", unit="batch"):
        batch_frames = min(batch_size, total_frames - i)

        image_index = (i // (image_duration * fps)) % len(image_files)
        img_path = os.path.join(image_folder, image_files[image_index])
        img = Image.open(img_path).convert('RGB')
        img_tensor = transform(img)

        for j in range(batch_frames):
            frame = process_frame_gpu(img_tensor, energies, i+j)
            frames.append(frame)

        del img_tensor
        torch.cuda.empty_cache()
        gc.collect()

        if (i + batch_frames) % 1000 == 0 or (i + batch_frames) == total_frames:
            with open(f'frames_{i+batch_frames}.pkl', 'wb') as f:
                pickle.dump(frames, f)
            frames = []
            print(f"Salvati {i+batch_frames} frame")

    print("Step 2 completato. Frame elaborati e salvati.")

# Step 3: Creazione del video finale
def step3_create_video(output_file):
    print("Step 3: Creazione del video finale")

    with open('step1_data.pkl', 'rb') as f:
        data = pickle.load(f)

    fps = data['fps']
    audio_file = data['audio_file']
    start_time = data['start_time']
    end_time = data['end_time']

    frames = []
    frame_files = sorted([f for f in os.listdir('.') if f.startswith('frames_') and f.endswith('.pkl')])

    for file in frame_files:
        with open(file, 'rb') as f:
            frames.extend(pickle.load(f))

    print("Creazione del video...")
    clip = ImageSequenceClip(frames, fps=fps)

    print("Aggiunta dell'audio al video...")
    audio = AudioFileClip(audio_file).subclip(start_time, end_time)
    final_video = clip.set_audio(audio)

    final_output = output_file.replace('.mp4', '_with_audio.mp4')
    print(f"Scrittura del file video finale: {final_output}")
    final_video.write_videofile(final_output, fps=fps, codec='libx264')

    print(f"Video creato con successo: {final_output}")
    clip.close()
    audio.close()
    final_video.close()

    # Pulizia dei file temporanei
    for file in frame_files:
        os.remove(file)
    os.remove('step1_data.pkl')

    return final_output

# Funzione principale per eseguire tutti gli step
def run_all_steps(image_folder, audio_file, output_file, fps=30, batch_size=10, test_duration=1644, image_duration=5, output_size=(1024, 1024)):
    step1_prepare_data(image_folder, audio_file, fps, test_duration, output_size)
    step2_process_frames(image_folder, batch_size, image_duration)
    final_video = step3_create_video(output_file)
    return final_video

# Esempio di utilizzo
image_folder = "/content/drive/MyDrive/imma_vifrp"
audio_file = "/content/drive/MyDrive/RECORDER 0002 [2024-08-25 155530].wav"
output_file = "/content/output_video.mp4"

final_video = run_all_steps(
    image_folder,
    audio_file,
    output_file,
    fps=5,
    batch_size=10,
    test_duration=1644,
    image_duration=5,
    output_size=(1024, 1024)
)

# Scarica il video risultante
files.download(final_video)

Step 1: Preparazione dei dati e calcolo delle energie
Trovati 221 file immagine
Caricamento del file audio: /content/drive/MyDrive/RECORDER 0002 [2024-08-25 155530].wav
File audio caricato. Sample rate: 48000, Durata: 1644.51 secondi
Calcolo delle energie delle frequenze...


Analisi audio:   0%|          | 0/16440 [00:00<?, ?chunk/s]

Interpolazione delle energie...
Calcolo delle energie completato
Step 1 completato. Dati salvati in 'step1_data.pkl'
Step 2: Elaborazione dei frame
Numero totale di frame da processare: 8220


Elaborazione batch:   0%|          | 0/822 [00:00<?, ?batch/s]

Salvati 1000 frame
Salvati 2000 frame
Salvati 3000 frame
Salvati 4000 frame
Salvati 5000 frame
Salvati 6000 frame
Salvati 7000 frame
Salvati 8000 frame
Salvati 8220 frame
Step 2 completato. Frame elaborati e salvati.
Step 3: Creazione del video finale
Creazione del video...
Aggiunta dell'audio al video...
Scrittura del file video finale: /content/output_video_with_audio.mp4
Moviepy - Building video /content/output_video_with_audio.mp4.
MoviePy - Writing audio in output_video_with_audioTEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/output_video_with_audio.mp4





Moviepy - Done !
Moviepy - video ready /content/output_video_with_audio.mp4
Video creato con successo: /content/output_video_with_audio.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>