# Install Required Libraries
Install the necessary libraries using the environment.yml file.

In [None]:
# Install the necessary libraries using the environment.yml file
!pip install pyyaml
import yaml

# Load the environment.yml file
with open('environment.yml', 'r') as file:
    environment = yaml.safe_load(file)

# Install dependencies
for dependency in environment['dependencies']:
    if isinstance(dependency, str):
        !conda install -y {dependency}
    elif isinstance(dependency, dict) and 'pip' in dependency:
        for pip_dependency in dependency['pip']:
            !pip install {pip_dependency}

# Import Required Libraries
Import the necessary libraries, including argparse, whisper, spleeter, and tempfile.

In [None]:
# Import Required Libraries
import argparse
import whisper
from spleeter.separator import Separator
import tempfile

# Define Helper Functions
Define the helper functions for separating vocals, getting segments, and aligning lyrics.

In [None]:
# Define Helper Functions

def separated_vocals(filename):
    # Use spleeter to separate into files in a temporary directory, and return a reference to the directory
    separator = Separator('spleeter:2stems')
    temp_dir = tempfile.TemporaryDirectory()
    separator.separate_to_file(filename, temp_dir.name)
    return temp_dir

def jaccard_similarity(sent1, sent2):
    """Find text similarity using jaccard similarity"""
    # Tokenize sentences
    token1 = set(sent1.split())
    token2 = set(sent2.split())
     
    # intersection between tokens of two sentences    
    intersection_tokens = token1.intersection(token2)
    
    # Union between tokens of two sentences
    union_tokens=token1.union(token2)
    
    sim_= float(len(intersection_tokens) / len(union_tokens))
    return sim_

def get_segments(vocal_filename, model_size="medium"):
    model = whisper.load_model(model_size)
    result = model.transcribe(vocal_filename)
    print(f"Segments: {len(result['segments'])}")
    return result['segments']

def sync_segments(lyrics, segments):
    lyrics_synced = []
    lyrics_unsynced = lyrics.split('\n')

    for segment in segments:
        top_similarity = 0.0
        top_similarity_final_index = 1
        
        for i in range(1, len(lyrics_unsynced)):
            trial_text = ' '.join(lyrics_unsynced[:i])
            trial_similarity = jaccard_similarity(trial_text, segment['text'])
            if trial_similarity > top_similarity:
                top_similarity = trial_similarity
                top_similarity_final_index = i
        lyrics_synced = lyrics_synced + list(map(lambda x: f"[{math.floor(segment['start']/60):02d}:{math.floor(segment['start'] % 60):02d}.00] {x}\n", lyrics_unsynced[:top_similarity_final_index]))
        lyrics_unsynced = lyrics_unsynced[top_similarity_final_index:]

        
    lyrics_synced = lyrics_synced + list(map(lambda x: f"[{math.floor(segments[-1]['start']/60):02d}:{math.floor(segments[-1]['start'] % 60):02d}.00] {x}\n", lyrics_unsynced[0:]))
        
    return lyrics_synced

# Load Audio and Lyrics Files
Load the audio and lyrics files from the specified paths.

In [None]:
# Load Audio and Lyrics Files

# Define paths to the audio and lyrics files
audio_file_path = 'path/to/your/audio/file.mp3'
lyrics_file_path = 'path/to/your/lyrics/file.txt'

# Load the lyrics file
with open(lyrics_file_path, 'r') as file:
    lyrics = file.read()

# Print the first few lines of the lyrics to verify
print(lyrics.split('\n')[:10])

# Separate Vocals from Music
Use the spleeter library to separate the vocals from the music.

In [None]:
# Separate Vocals from Music

# Separate the vocals from the music using the `separated_vocals` function
temp_dir = separated_vocals(audio_file_path)

# Define the path to the separated vocals file
vocal_file_name = f"{temp_dir.name}/{audio_file_path.split('/')[-1].split('.')[0]}/vocals.wav"

# Print the path to the separated vocals file to verify
print(vocal_file_name)

# Get Segments from Vocals
Use the whisper library to get segments from the separated vocals.

In [None]:
# Get Segments from Vocals

# Use the whisper library to get segments from the separated vocals
segments = get_segments(vocal_file_name)

# Print the first few segments to verify
print(segments[:10])

# Align Lyrics with Segments
Align the lyrics with the segments using the defined helper functions.

In [None]:
# Align Lyrics with Segments

# Align the lyrics with the segments using the defined helper functions
aligned_lyrics = sync_segments(lyrics, segments)

# Print the first few lines of the aligned lyrics to verify
print(aligned_lyrics[:10])

# Save Aligned Lyrics to File

# Define the output file path
output_file_path = 'path/to/your/output/file.lrc'

# Save the aligned lyrics to the output file
with open(output_file_path, 'w') as file:
    file.writelines(aligned_lyrics)

# Print a message to confirm that the aligned lyrics have been saved
print(f"Aligned lyrics have been saved to {output_file_path}")

# Save Aligned Lyrics to File
Save the aligned lyrics to a file in the specified format.

In [None]:
# Save Aligned Lyrics to File

# Define the output file path
output_file_path = 'path/to/your/output/file.lrc'

# Save the aligned lyrics to the output file
with open(output_file_path, 'w') as file:
    file.writelines(aligned_lyrics)

# Print a message to confirm that the aligned lyrics have been saved
print(f"Aligned lyrics have been saved to {output_file_path}")