In [4]:
import os
import torch
from speechbrain.pretrained import SpeakerRecognition
from speechbrain.pretrained import EncoderClassifier
from speechbrain.pretrained import Pretrained
import sqlite3
import tempfile
import re

# Load the speaker recognition model
sr_model = SpeakerRecognition.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="pretrained_models/spkrec-ecapa-voxceleb")

# Load the encoder classifier model
ec_model = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="pretrained_models/spkrec-ecapa-voxceleb")

p_model = Pretrained.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="pretrained_models/spkrec-ecapa-voxceleb")

# Extract the embeddings of the reference audio file
reference_file = "/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/data/training/standard.wav"
waveform_x = p_model.load_audio(path=reference_file)
batch_x = waveform_x.unsqueeze(0)
emb_ref = ec_model.encode_batch(batch_x, normalize=True)

similarity = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)

folder_path = "/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data"
metadata_db_path = "/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/metadata.db"


In [48]:
temp_dir = tempfile.gettempdir()

count_fail = 0
from natsort import natsorted
# Connect to the SQLite database
conn = sqlite3.connect(metadata_db_path)
c = conn.cursor()

# Create the table for storing metadata
c.execute('''CREATE TABLE IF NOT EXISTS speaker_files
             (id INTEGER PRIMARY KEY AUTOINCREMENT, file_path TEXT, is_speaker INTEGER)''')

# Loop through all the files in the folder and check for similarity with reference audio file
for file in natsorted(os.listdir(folder_path), key=lambda x: (x.split("_")[0], int(x.split("_")[1].split(".")[0][7:]))):
    if file.endswith('.wav'):
        waveform_y = p_model.load_audio(path=os.path.join(folder_path, file), savedir=temp_dir)
        batch_y = waveform_y.unsqueeze(0)
        emb_y = ec_model.encode_batch(batch_y, normalize=True)
        score = similarity(emb_y, emb_ref)
        is_speaker = 1 if score > 0.25 else 0
        if is_speaker == 0:
            count_fail += 1
        print(f"File: {file} Score: {score} Passes Verification: {is_speaker}")
        
        # Insert the metadata into the database
        c.execute("INSERT INTO speaker_files (file_path, is_speaker) VALUES (?, ?)", (os.path.join(folder_path, file), is_speaker))
        
        os.remove(os.path.join(temp_dir, file))

# Commit the changes to the database and close the connection
conn.commit()
conn.close()


File: video1_segment1.wav Score: tensor([[0.7505]]) Passes Verification: 1
File: video1_segment2.wav Score: tensor([[0.8099]]) Passes Verification: 1
File: video1_segment3.wav Score: tensor([[0.7106]]) Passes Verification: 1
File: video1_segment4.wav Score: tensor([[0.7204]]) Passes Verification: 1
File: video1_segment5.wav Score: tensor([[0.7986]]) Passes Verification: 1
File: video1_segment6.wav Score: tensor([[0.7662]]) Passes Verification: 1
File: video1_segment7.wav Score: tensor([[0.6875]]) Passes Verification: 1
File: video1_segment8.wav Score: tensor([[0.7853]]) Passes Verification: 1
File: video1_segment9.wav Score: tensor([[0.8199]]) Passes Verification: 1
File: video1_segment10.wav Score: tensor([[0.8010]]) Passes Verification: 1
File: video1_segment11.wav Score: tensor([[0.7689]]) Passes Verification: 1
File: video1_segment12.wav Score: tensor([[0.7507]]) Passes Verification: 1
File: video1_segment13.wav Score: tensor([[0.7314]]) Passes Verification: 1
File: video1_segment1

In [38]:
print(file)

video2_segment165.txt


In [49]:
import sqlite3

metadata_db_path = "metadata.db"

# Connect to the SQLite database
conn = sqlite3.connect(metadata_db_path)
c = conn.cursor()

# Query the database and print the results
for row in c.execute("SELECT * FROM speaker_files"):
    print(row)

# Close the connection
conn.close()


(1, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment1.wav', 1)
(2, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment2.wav', 1)
(3, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment3.wav', 1)
(4, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment4.wav', 1)
(5, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment5.wav', 1)
(6, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment6.wav', 1)
(7, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment7.wav', 1)
(8, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment8.wav', 1)
(9, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment9.wav', 1)
(10, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment

In [51]:
import sqlite3

def create_transcripts(metadata_db_file, transcripts_dir):
    # Connect to the metadata database file
    conn = sqlite3.connect(metadata_db_file)
    c = conn.cursor()
    
    # Select all the rows from the speaker_files table
    c.execute("SELECT * FROM speaker_files")
    rows = c.fetchall()
    
    # Loop through each row in the table
    current_speaker = None
    speaker_prefix = ""
    speaker_text = ""
    current_video = None
    for row in rows:
        # Get the file path and the is_speaker value
        file_path = row[1]
        is_speaker = row[2]
        
        # Split the file path into parts
        parts = file_path.split("/")
        
        # Get the file name
        file_name = parts[-1]
        
        # Split the file name into parts
        parts = file_name.split(".")
        
        # Get the base file name (without the extension)
        base_file_name = parts[0]
        
        # Split the base file name into parts
        parts = base_file_name.split("_")
        
        # Get the video number and segment number
        video_number = int(parts[0].replace("video", ""))
        segment_number = int(parts[1].replace("segment", ""))
        
        # Create the text file path
        text_file_path = "/".join(file_path.split("/")[:-1]) + "/" + base_file_name + ".txt"
        
        # Read the text file
        with open(text_file_path, "r") as f:
            text = f.read()
        
        # If the current speaker has changed, write the previous speaker's text to the transcript file
        if current_speaker is not None and current_speaker != is_speaker:
            transcript_file_path = f"{transcripts_dir}/video{current_video}_dialogue_transcript.txt"
            with open(transcript_file_path, "a") as f:
                f.write(speaker_prefix + speaker_text + "\n")
            
            # Reset the speaker text
            speaker_text = ""
        
        # Add the speaker and text to the transcript
        if is_speaker:
            speaker_prefix = "[Speaker]: "
        else:
            speaker_prefix = "[Context]: "
        
        # Update the current speaker and speaker text
        current_speaker = is_speaker
        speaker_text
        speaker_text += text + " "
        current_video = video_number
    
    # Write the last speaker's text to the transcript file
    transcript_file_path = f"{transcripts_dir}/video{current_video}_dialogue_transcript.txt"
    with open(transcript_file_path, "a") as f:
        f.write(speaker_prefix + speaker_text + "\n")
    
    # Close the connection to the database
    conn.close()

# Example usage
metadata_db_file = "metadata.db"
transcripts_dir = "/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/data/dialogue_transcripts"
create_transcripts(metadata_db_file, transcripts_dir)


In [8]:
import os
import torch
from speechbrain.pretrained import SpeakerRecognition
from speechbrain.pretrained import EncoderClassifier
from speechbrain.pretrained import Pretrained
import sqlite3
import tempfile
import re
from natsort import natsorted

def load_models():
    # Load the speaker recognition model
    sr_model = SpeakerRecognition.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="pretrained_models/spkrec-ecapa-voxceleb")

    # Load the encoder classifier model
    ec_model = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="pretrained_models/spkrec-ecapa-voxceleb")

    p_model = Pretrained.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="pretrained_models/spkrec-ecapa-voxceleb")
    return sr_model, ec_model, p_model

def extract_embeddings(p_model, reference_file):
    # Extract the embeddings of the reference audio file
    waveform_x = p_model.load_audio(path=reference_file)
    batch_x = waveform_x.unsqueeze(0)
    emb_ref = ec_model.encode_batch(batch_x, normalize=True)
    return emb_ref

def compare_embeddings(emb_ref, emb_y):
    similarity = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)
    score = similarity(emb_y, emb_ref)
    is_speaker = 1 if score > 0.25 else 0
    return is_speaker

def create_metadata_table(metadata_db_path):
    # Connect to the SQLite database
    conn = sqlite3.connect(metadata_db_path)
    c = conn.cursor()

    # Create the table for storing metadata
    c.execute('''CREATE TABLE IF NOT EXISTS speaker_files
                 (id INTEGER PRIMARY KEY AUTOINCREMENT, file_path TEXT, is_speaker INTEGER)''')
    return conn, c

def check_similarity(folder_path, metadata_db_path, temp_dir, emb_ref, p_model, ec_model):
    count_fail = 0
    # Loop through all the files in the folder and check for similarity with reference audio file
    for file in natsorted(os.listdir(folder_path), key=lambda x: (x.split("_")[0], int(x.split("_")[1].split(".")[0][7:]))):
        if file.endswith('.wav'):
            waveform_y = p_model.load_audio(path=os.path.join(folder_path, file), savedir=temp_dir)
            batch_y = waveform_y.unsqueeze(0)
            emb_y = ec_model.encode_batch(batch_y, normalize=True)
            is_speaker = compare_embeddings(emb_ref, emb_y)
            if is_speaker == 0:
                count_fail += 1
            print(f"File: {file} Passes Verification: {is_speaker}")
            
            # Insert the metadata into the database
            c.execute("INSERT INTO speaker_files (file_path, is_speaker) VALUES (?, ?)", (os.path.join(folder_path, file), is_speaker))
            
            os.remove(os.path.join(temp_dir, file))

In [9]:
# Load the models
sr_model, ec_model, p_model = load_models()

# Extract the embeddings of the reference audio file
reference_file = "/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/data/training/standard.wav"
emb_ref = extract_embeddings(p_model, reference_file)

In [3]:
# Create the metadata table
metadata_db_path = "/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/metadata.db"
conn, c = create_metadata_table(metadata_db_path)

In [5]:

# Check for similarity
folder_path = "/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data"
temp_dir = tempfile.gettempdir()
check_similarity(folder_path, metadata_db_path, temp_dir, emb_ref, p_model, ec_model)



File: video1_segment1.wav Passes Verification: 1
File: video1_segment2.wav Passes Verification: 1
File: video1_segment3.wav Passes Verification: 1
File: video1_segment4.wav Passes Verification: 1
File: video1_segment5.wav Passes Verification: 1
File: video1_segment6.wav Passes Verification: 1
File: video1_segment7.wav Passes Verification: 1
File: video1_segment8.wav Passes Verification: 1
File: video1_segment9.wav Passes Verification: 1
File: video1_segment10.wav Passes Verification: 1
File: video1_segment11.wav Passes Verification: 1
File: video1_segment12.wav Passes Verification: 1
File: video1_segment13.wav Passes Verification: 1
File: video1_segment14.wav Passes Verification: 1
File: video1_segment15.wav Passes Verification: 1
File: video1_segment16.wav Passes Verification: 1
File: video1_segment17.wav Passes Verification: 1
File: video1_segment18.wav Passes Verification: 1
File: video1_segment19.wav Passes Verification: 1
File: video1_segment20.wav Passes Verification: 1
File: vid

In [6]:
# Commit the changes to the database and close the connection
conn.commit()
conn.close()

In [7]:
import sqlite3

metadata_db_path = "metadata.db"

# Connect to the SQLite database
conn = sqlite3.connect(metadata_db_path)
c = conn.cursor()

# Query the database and print the results
for row in c.execute("SELECT * FROM speaker_files"):
    print(row)

# Close the connection
conn.close()


(1, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment1.wav', 1)
(2, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment2.wav', 1)
(3, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment3.wav', 1)
(4, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment4.wav', 1)
(5, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment5.wav', 1)
(6, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment6.wav', 1)
(7, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment7.wav', 1)
(8, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment8.wav', 1)
(9, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data/video1_segment9.wav', 1)
(10, '/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment

In [None]:
import os
import torch
from speechbrain.pretrained import SpeakerRecognition
from speechbrain.pretrained import EncoderClassifier
from speechbrain.pretrained import Pretrained
import sqlite3
import tempfile
import re
from natsort import natsorted

def load_models():
    # Load the speaker recognition model
    sr_model = SpeakerRecognition.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="pretrained_models/spkrec-ecapa-voxceleb")

    # Load the encoder classifier model
    ec_model = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="pretrained_models/spkrec-ecapa-voxceleb")

    p_model = Pretrained.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="pretrained_models/spkrec-ecapa-voxceleb")
    return sr_model, ec_model, p_model

def extract_embeddings(p_model, ec_model, reference_file):
    # Extract the embeddings of the reference audio file
    waveform_x = p_model.load_audio(path=reference_file)
    batch_x = waveform_x.unsqueeze(0)
    emb_ref = ec_model.encode_batch(batch_x, normalize=True)
    return emb_ref

def compare_embeddings(emb_ref, emb_y):
    similarity = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)
    score = similarity(emb_y, emb_ref)
    is_speaker = 1 if score > 0.25 else 0
    return is_speaker

def create_metadata_table(metadata_db_path):
    # Connect to the SQLite database
    conn = sqlite3.connect(metadata_db_path)
    c = conn.cursor()

    # Create the table for storing metadata
    c.execute('''CREATE TABLE IF NOT EXISTS speaker_files
                 (id INTEGER PRIMARY KEY AUTOINCREMENT, file_path TEXT, is_speaker INTEGER)''')
    return conn, c

def check_similarity(folder_path, c, temp_dir, emb_ref, p_model, ec_model):
    count_fail = 0
    # Loop through all the files in the folder and check for similarity with reference audio file
    for file in natsorted(os.listdir(folder_path), key=lambda x: (x.split("_")[0], int(x.split("_")[1].split(".")[0][7:]))):
        if file.endswith('.wav'):
            waveform_y = p_model.load_audio(path=os.path.join(folder_path, file), savedir=temp_dir)
            batch_y = waveform_y.unsqueeze(0)
            emb_y = ec_model.encode_batch(batch_y, normalize=True)
            is_speaker = compare_embeddings(emb_ref, emb_y)
            if is_speaker == 0:
                count_fail += 1
            print(f"File: {file} Passes Verification: {is_speaker}")
            
            # Insert the metadata into the database
            c.execute("INSERT INTO speaker_files (file_path, is_speaker) VALUES (?, ?)", (os.path.join(folder_path, file), is_speaker))
            
            os.remove(os.path.join(temp_dir, file))


# Usage

# # Load the models
# sr_model, ec_model, p_model = load_models()

# # Extract the embeddings of the reference audio file
# reference_file = "/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/data/training/standard.wav"
# emb_ref = extract_embeddings(p_model, ec_model, reference_file)

# # Create the metadata table
# metadata_db_path = "/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/metadata.db"
# conn, c = create_metadata_table(metadata_db_path)

# # Check for similarity
# folder_path = "/Users/harshbhatia/Documents/CBD/Charlie_CBD/nexusbot/tests/segment_data"
# temp_dir = tempfile.gettempdir()
# check_similarity(folder_path, c, temp_dir, emb_ref, p_model, ec_model)

# # Commit the changes to the database and close the connection
# conn.commit()
# conn.close()