In [None]:
!pip install -U git+https://github.com/facebookresearch/audiocraft.git
!pip install torchaudio

In [None]:
import torchaudio
import torch
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
import os
import random

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

print("Loading MusicGen Melody...")
model = MusicGen.get_pretrained('facebook/musicgen-melody')
model.set_generation_params(duration=15)
print("Model Loaded!")

In [None]:
# ŒõŒØœÉœÑŒ± ŒºŒµ œÄŒπŒ∏Œ±ŒΩŒ¨ œÉœÑœÖŒª Œ≥ŒπŒ± ŒΩŒ± ŒºœÄŒµœÅŒ¥Œ≠œàŒøœÖŒºŒµ œÑŒø ŒºŒøŒΩœÑŒ≠ŒªŒø (Hard Positives)
STYLES = [
    "acoustic guitar cover, calm, folk",
    "heavy metal version, distorted guitars, loud drums",
    "8-bit chiptune version, video game music",
    "jazz piano cover, smooth, relaxing",
    "techno remix, electronic, synthesizer, fast tempo",
    "orchestral version, violins, epic",
    "reggae style, slow beat, bass"
]

def generate_ai_cover(input_path, output_path, duration=15):
    """
    ŒîŒπŒ±Œ≤Œ¨Œ∂ŒµŒπ Œ≠ŒΩŒ± wav, Œ∫œÅŒ±œÑŒ¨ŒµŒπ œÑŒ∑ ŒºŒµŒªœâŒ¥ŒØŒ± Œ∫Œ±Œπ Œ±ŒªŒªŒ¨Œ∂ŒµŒπ œÑŒø œÉœÑœÖŒª.
    """
    try:
        # 1. Œ¶œåœÅœÑœâœÉŒ∑ ŒâœáŒøœÖ
        melody_waveform, sr = torchaudio.load(input_path)

        # ŒëŒΩ ŒµŒØŒΩŒ±Œπ stereo, œÑŒø Œ∫Œ¨ŒΩŒøœÖŒºŒµ mono (ŒÆ Œ∫œÅŒ±œÑŒ¨ŒºŒµ channels Œ±ŒΩ œÑŒø œÖœÄŒøœÉœÑŒ∑œÅŒØŒ∂ŒµŒπ œÑŒø ŒºŒøŒΩœÑŒ≠ŒªŒø)
        # Œ§Œø MusicGen Œ∏Œ≠ŒªŒµŒπ [Batch, Channels, Time]
        if melody_waveform.dim() == 2:
            melody_waveform = melody_waveform.unsqueeze(0)

        # ŒöœåŒ≤ŒøœÖŒºŒµ œÉœÑŒ± œÄœÅœéœÑŒ± 'duration' Œ¥ŒµœÖœÑŒµœÅœåŒªŒµœÄœÑŒ± Œ≥ŒπŒ± ŒΩŒ± ŒºŒ∑ŒΩ Œ≥ŒµŒºŒØœÉŒµŒπ Œ∑ ŒºŒΩŒÆŒºŒ∑
        max_frames = int(duration * sr)
        if melody_waveform.shape[-1] > max_frames:
            # Œ†Œ±ŒØœÅŒΩŒøœÖŒºŒµ Œ≠ŒΩŒ± œÑœÖœáŒ±ŒØŒø Œ∫ŒøŒºŒºŒ¨œÑŒπ ŒÆ œÑŒ∑ŒΩ Œ±œÅœáŒÆ
            start = 0
            melody_waveform = melody_waveform[..., start : start + max_frames]

        # 2. ŒïœÄŒπŒªŒøŒ≥ŒÆ œÑœÖœáŒ±ŒØŒøœÖ œÉœÑœÖŒª
        prompt = random.choice(STYLES)
        print(f"   üé® Style: {prompt}")

        # 3. ŒìŒ≠ŒΩŒΩŒ∑œÉŒ∑ (Generation)
        # Œ§Œø 'melody_wavs' ŒµŒØŒΩŒ±Œπ œÑŒø input œÄŒøœÖ Œ¥ŒØŒΩŒµŒπ œÑŒ∑ ŒºŒµŒªœâŒ¥ŒØŒ±
        wav = model.generate_with_chroma(
            descriptions=[prompt],
            melody_wavs=melody_waveform,
            melody_sample_rate=sr,
            progress=True
        )

        # 4. ŒëœÄŒøŒ∏ŒÆŒ∫ŒµœÖœÉŒ∑
        # Œ§Œø wav ŒµŒØŒΩŒ±Œπ tensor [1, 1, Time], œÑŒø audio_write Œ∏Œ≠ŒªŒµŒπ [1, Time]
        audio_write(output_path, wav[0].cpu(), model.sample_rate, strategy="loudness", loudness_headroom_db=16)
        print(f"   üíæ Saved to: {output_path}.wav")
        return True

    except Exception as e:
        print(f"‚ùå Error generating cover for {input_path}: {e}")
        return False

In [None]:
INPUT_ROOT_DIR = "/content/drive/MyDrive/Plagiarism-Detection-System/data/audio_files"

MAX_FILES_TO_GENERATE = 50

def batch_generate_hard_positives():
    count = 0

    print(f"Scanning {INPUT_ROOT_DIR} for wav files...")

    for root, dirs, files in os.walk(INPUT_ROOT_DIR):
        for file in files:
            if file.endswith(".wav") and "ai_cover" not in file:
                if count >= MAX_FILES_TO_GENERATE:
                    print("Reached limit.")
                    return

                input_path = os.path.join(root, file)

                file_name_no_ext = os.path.splitext(file)[0]
                save_dir = os.path.join(root, "ai_versions")
                os.makedirs(save_dir, exist_ok=True)

                output_filename = f"{file_name_no_ext}_hard_positive"
                output_path = os.path.join(save_dir, output_filename)

                if os.path.exists(output_path + ".wav"):
                    print(f"‚è© Skipping existing: {output_filename}")
                    continue

                print(f"\nüéµ Processing ({count+1}/{MAX_FILES_TO_GENERATE}): {file}")

                success = generate_ai_cover(input_path, output_path, duration=15)
                if success:
                    count += 1

batch_generate_hard_positives()