<a href="https://colab.research.google.com/github/devloperAnu/Sample_to_target/blob/main/sample_to_target.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Install Dependencies
!pip install so-vits-svc-fork
!pip install pydub
!apt-get install -y ffmpeg
print("Dependencies installed successfully!")

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.
Dependencies installed successfully!


# New Section

In [None]:
# Cell 2: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
print("Google Drive mounted successfully!")

Mounted at /content/drive
Google Drive mounted successfully!


In [None]:
# Cell 3: Create Directory Structure
import os

# Define paths
base_path = '/content/drive/MyDrive/so-vits-svc-fork'
voice_sample_dir = os.path.join(base_path, 'Audio_file.mp3')
input_audio_dir = os.path.join(base_path, 'output')
output_dir = os.path.join(base_path, 'target.mp3')

# Create directories if they don't exist
os.makedirs(voice_sample_dir, exist_ok=True)
os.makedirs(input_audio_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

print(f"Directories created:\nVoice Samples: {voice_sample_dir}\nInput Audio: {input_audio_dir}\nOutput: {output_dir}")

Directories created:
Voice Samples: /content/drive/MyDrive/so-vits-svc-fork/Audio_file.mp3
Input Audio: /content/drive/MyDrive/so-vits-svc-fork/output
Output: /content/drive/MyDrive/so-vits-svc-fork/target.mp3


In [None]:
# Install Python libraries for audio processing
!pip install librosa pydub soundfile numpy

# Install ffmpeg for pydub to process MP3 files
!apt-get install -y ffmpeg

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.


In [None]:
# Import libraries
import librosa
import soundfile as sf
from pydub import AudioSegment
import numpy as np
import os
from google.colab import files

In [None]:
# Upload sample.mp3 and target.mp3
print("Please upload sample.mp3 and target.mp3")
uploaded = files.upload()

# Verify that both files were uploaded
# Updated condition to check if any files were uploaded
if not uploaded:
    raise ValueError("Please upload at least one audio file.")

# Get the filenames of the uploaded files
sample_file = list(uploaded.keys())[0]  # Get the first uploaded file as sample
target_file = list(uploaded.keys())[1] if len(uploaded) > 1 else list(uploaded.keys())[0] # Get the second if available, otherwise use the first as target

Please upload sample.mp3 and target.mp3


Saving sample1T.mp3 to sample1T (2).mp3
Saving target.mp3 to target (3).mp3


In [None]:
# Define file paths
sample_file = "sample.mp3"
target_file = "target.mp3"
output_file = "output.mp3"
sample_wav = "sample.wav"
target_wav = "target.wav"
converted_wav = "converted.wav"

# Function to convert MP3 to WAV
def mp3_to_wav(mp3_path, wav_path):
    audio = AudioSegment.from_mp3(mp3_path)
    audio.export(wav_path, format="wav")

# Convert both files to WAV
mp3_to_wav(sample_file, sample_wav)
mp3_to_wav(target_file, target_wav)

In [None]:
# Function to analyze audio pitch
def analyze_audio(file_path):
    y, sr = librosa.load(file_path, sr=None)
    # Extract pitch (fundamental frequency)
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    pitch = np.mean(pitches[magnitudes > 0]) if np.any(magnitudes > 0) else 0
    return {"pitch": pitch, "sr": sr}

# Analyze sample audio
sample_features = analyze_audio(sample_wav)
print(f"Sample audio pitch: {sample_features['pitch']} Hz")

Sample audio pitch: 802.2341918945312 Hz


In [None]:
# Load target audio
y_target, sr_target = librosa.load(target_wav)

# Analyze target audio pitch
pitches_target, _ = librosa.piptrack(y=y_target, sr=sr_target)
target_pitch = np.mean(pitches_target[pitches_target > 0]) if np.any(pitches_target > 0) else 0
print(f"Target audio pitch: {target_pitch} Hz")

# Calculate pitch shift (in semitones)
n_steps = 12 * np.log2(sample_features['pitch'] / target_pitch) if target_pitch > 0 else 0
print(f"Pitch shift required: {n_steps} semitones")

# Apply pitch shift to target audio
y_shifted = librosa.effects.pitch_shift(y_target, sr=sr_target, n_steps=n_steps)

# Save the modified audio
sf.write(converted_wav, y_shifted, sr_target)

Target audio pitch: 1126.0858154296875 Hz
Pitch shift required: -5.870657444000244 semitones


In [None]:
# Function to convert WAV to MP3
def wav_to_mp3(wav_path, mp3_path):
    audio = AudioSegment.from_wav(wav_path)
    audio.export(mp3_path, format="mp3")

# Convert modified audio to MP3
wav_to_mp3(converted_wav, output_file)

# Clean up temporary WAV files
os.remove(sample_wav)
os.remove(target_wav)
os.remove(converted_wav)

In [None]:
# Download the output MP3
print("Downloading output.mp3")
files.download(output_file)

Downloading output.mp3


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>