In [None]:
import os
import shutil
import concurrent.futures
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import librosa
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from spellchecker import SpellChecker
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import string
from moviepy.editor import VideoFileClip

In [None]:
# Define the directory containing the videos
video_directory = r'D:\SRM\3rd_Year\6th_Semester\Research\Video_Retriveal_System\Data\NPTEL\Video'

# Define the parent directory where the audio and transcriptions will be stored
parent_audio_directory = r'D:\SRM\3rd_Year\6th_Semester\Research\Video_Retriveal_System\Data\NPTEL'

# Define the reference text for similarity calculation
input_text = "linear regression"  

# Load pre-trained Wav2Vec2 model and processor
wav2vec2_processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
wav2vec2_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")

In [None]:
# Function to convert video to audio
def convert_video_to_audio(input_video_path, output_audio_path):
    """Converts a video file to audio."""
    video_clip = VideoFileClip(input_video_path)
    audio_clip = video_clip.audio
    audio_clip.write_audiofile(output_audio_path)
    video_clip.close()

In [None]:
# Function to transcribe audio chunks
def transcribe_chunk(chunk, wav2vec2_processor, wav2vec2_model):
    """Transcribe a chunk of audio."""
    input_values = wav2vec2_processor(chunk, sampling_rate=16000, return_tensors="pt", padding=True).input_values
    logits = wav2vec2_model(input_values).logits
    transcription = wav2vec2_processor.batch_decode(torch.argmax(logits, dim=-1))
    return transcription

In [None]:
# Function to transcribe audio in parallel
def transcribe_audio_parallel(audio_file_path, wav2vec2_processor, wav2vec2_model, chunk_size=5):
    """Transcribe audio in parallel using Wav2Vec2."""
    audio_input, _ = librosa.load(audio_file_path, sr=16000)
    chunks = [audio_input[i:i + chunk_size * 16000] for i in range(0, len(audio_input), chunk_size * 16000)]
    transcriptions = []

    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_chunk = {executor.submit(transcribe_chunk, chunk, wav2vec2_processor, wav2vec2_model): chunk
                           for chunk in chunks}

        for future in concurrent.futures.as_completed(future_to_chunk):
            try:
                transcription = future.result()
                transcriptions.extend(transcription)
            except Exception as e:
                print(f"Error processing chunk: {e}")

    return transcriptions

In [None]:
# Function to optimize transcribed text
def optimize_text(text):
    """Optimizes transcribed text by removing stopwords and punctuation."""
    words = word_tokenize(text)a
    english_stopwords = set(stopwords.words('english'))
    punctuation = set(string.punctuation)
    all_stopwords = english_stopwords.union(punctuation)
    filtered_words = [word for word in words if word.lower() not in all_stopwords]
    filtered_text = ' '.join(filtered_words)
    return filtered_text

In [None]:
# Function to correct spelling errors
def correct_spelling(text):
    """Corrects spelling errors in the text."""
    spell = SpellChecker()
    filtered_words = text.split()
    corrected_words = [spell.correction(word) for word in filtered_words if spell.correction(word) is not None]
    corrected_text = ' '.join(corrected_words)
    return corrected_text

In [None]:
# Function to calculate cosine similarity
def calculate_cosine_similarity(transcribed_audio_text, input_text):
    """Calculates cosine similarity between transcribed audio text and input text."""
    vectorizer = CountVectorizer().fit([transcribed_audio_text, input_text])
    vectorized_text = vectorizer.transform([transcribed_audio_text, input_text])
    cosine_sim = cosine_similarity(vectorized_text)[0][1]
    return cosine_sim

In [None]:
import os
import shutil
import concurrent.futures
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import librosa
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from spellchecker import SpellChecker
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import string
from moviepy.editor import VideoFileClip

# Define the directory containing the videos
video_directory = r'D:\SRM\3rd_Year\6th_Semester\Research\Video_Retriveal_System\Data\NPTEL\Video'

# Define the parent directory where the audio and transcriptions will be stored
parent_audio_directory = r'D:\SRM\3rd_Year\6th_Semester\Research\Video_Retriveal_System\Data\NPTEL'

# Define the reference text for similarity calculation
input_text = "linear regression"  

# Load pre-trained Wav2Vec2 model and processor
wav2vec2_processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
wav2vec2_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")

# Function to convert video to audio
def convert_video_to_audio(input_video_path, output_audio_path):
    """Converts a video file to audio."""
    video_clip = VideoFileClip(input_video_path)
    audio_clip = video_clip.audio
    audio_clip.write_audiofile(output_audio_path)
    video_clip.close()

# Function to transcribe audio chunks
def transcribe_chunk(chunk, wav2vec2_processor, wav2vec2_model):
    """Transcribe a chunk of audio."""
    input_values = wav2vec2_processor(chunk, sampling_rate=16000, return_tensors="pt", padding=True).input_values
    logits = wav2vec2_model(input_values).logits
    transcription = wav2vec2_processor.batch_decode(torch.argmax(logits, dim=-1))
    return transcription

# Function to transcribe audio in parallel
def transcribe_audio_parallel(audio_file_path, wav2vec2_processor, wav2vec2_model, chunk_size=5):
    """Transcribe audio in parallel using Wav2Vec2."""
    audio_input, _ = librosa.load(audio_file_path, sr=16000)
    chunks = [audio_input[i:i + chunk_size * 16000] for i in range(0, len(audio_input), chunk_size * 16000)]
    transcriptions = []

    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_chunk = {executor.submit(transcribe_chunk, chunk, wav2vec2_processor, wav2vec2_model): chunk
                           for chunk in chunks}

        for future in concurrent.futures.as_completed(future_to_chunk):
            try:
                transcription = future.result()
                transcriptions.extend(transcription)
            except Exception as e:
                print(f"Error processing chunk: {e}")

    return transcriptions

# Function to optimize transcribed text
def optimize_text(text):
    """Optimizes transcribed text by removing stopwords and punctuation."""
    words = word_tokenize(text)a
    english_stopwords = set(stopwords.words('english'))
    punctuation = set(string.punctuation)
    all_stopwords = english_stopwords.union(punctuation)
    filtered_words = [word for word in words if word.lower() not in all_stopwords]
    filtered_text = ' '.join(filtered_words)
    return filtered_text

# Function to correct spelling errors
def correct_spelling(text):
    """Corrects spelling errors in the text."""
    spell = SpellChecker()
    filtered_words = text.split()
    corrected_words = [spell.correction(word) for word in filtered_words if spell.correction(word) is not None]
    corrected_text = ' '.join(corrected_words)
    return corrected_text

# Function to calculate cosine similarity
def calculate_cosine_similarity(transcribed_audio_text, input_text):
    """Calculates cosine similarity between transcribed audio text and input text."""
    vectorizer = CountVectorizer().fit([transcribed_audio_text, input_text])
    vectorized_text = vectorizer.transform([transcribed_audio_text, input_text])
    cosine_sim = cosine_similarity(vectorized_text)[0][1]
    return cosine_sim




In [None]:
if __name__ == "__main__":
    # Convert video to audio
    for video_file in os.listdir(video_directory):
        if video_file.endswith('.mp4'):
            video_path = os.path.join(video_directory, video_file)
            audio_directory = os.path.join(parent_audio_directory, 'audio_KBVRS', os.path.splitext(video_file)[0])
            audio_path = os.path.join(audio_directory, os.path.splitext(video_file)[0] + '.wav')
            transcription_file = os.path.join(audio_directory, os.path.splitext(video_file)[0] + '_transcription.txt')

            # Check if transcription directory already exists
            if not os.path.exists(audio_directory):
                os.makedirs(audio_directory)

                # Convert video to audio
                convert_video_to_audio(video_path, audio_path)

    # Transcribe audio
    for video_file in os.listdir(video_directory):
        if video_file.endswith('.mp4'):
            video_path = os.path.join(video_directory, video_file)
            audio_directory = os.path.join(parent_audio_directory, 'audio_KBVRS', os.path.splitext(video_file)[0])
            audio_path = os.path.join(audio_directory, os.path.splitext(video_file)[0] + '.wav')
            transcription_file = os.path.join(audio_directory, os.path.splitext(video_file)[0] + '_transcription.txt')

            # Check if transcription file already exists
            if not os.path.exists(transcription_file):
                # Transcribe audio
                transcriptions = transcribe_audio_parallel(audio_path, wav2vec2_processor, wav2vec2_model)
                transcriptions_text = "".join(transcriptions)

                # Write transcribed text to file
                with open(transcription_file, "w") as file:
                    file.write(transcriptions_text)

                # Optimize text and correct spelling
                optimized_text = optimize_text(transcriptions_text)
                corrected_text = correct_spelling(optimized_text)

                # Calculate cosine similarity
                cosine_sim = calculate_cosine_similarity(corrected_text, input_text)

                print(f"Video: {video_file}, Cosine Similarity: {cosine_sim}")

                # Hide the directory
                if os.name == 'nt':  # Windows
                    ctypes.windll.kernel32.SetFileAttributesW(audio_directory, 2)  # 2 is the code for hidden attribute
                else:  # Unix-like
                    os.rename(audio_directory, os.path.join(os.path.dirname(audio_directory), '.' + os.path.basename(audio_directory)))
            else:
                print(f"Transcription file already exists for {video_file}. Skipping transcription.")

