In [None]:
!pip install moviepy
from moviepy.editor import VideoFileClip

def video_to_audio(video_file, output_audio_file):
    video_clip = VideoFileClip(video_file)
    audio_clip = video_clip.audio
    audio_clip.write_audiofile(output_audio_file)

# Example usage
video_to_audio("videoplayback.mp4", "output_audio.mp3")

MoviePy - Writing audio in output_audio.mp3


                                                                   

MoviePy - Done.




In [None]:
!pip install SpeechRecognition pydub
!pip install pyttsx3
import speech_recognition as sr
from pydub import AudioSegment

def convert_audio_to_text(audio_file, output_text_file):
    # Load your audio file
    audio = AudioSegment.from_file('/content/output_audio.mp3')

    # Export audio to wav format (required by SpeechRecognition)
    wav_file = "temp_audio.wav"
    audio.export(wav_file, format="wav")

    # Initialize recognizer
    recognizer = sr.Recognizer()

    # Load the audio file
    with sr.AudioFile(wav_file) as source:
        audio_data = recognizer.record(source)
        # Recognize (convert from speech to text)
        try:
            text = recognizer.recognize_google(audio_data)
            print("Transcription: " + text)
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand audio")
            text = ""
        except sr.RequestError as e:
            print(f"Could not request results from Google Speech Recognition service; {e}")
            text = ""

    # Save the text to a file
    with open(output_text_file, "w") as file:
        file.write(text)

# Example usage
convert_audio_to_text("input_audio.mp3", "output_text.txt")


Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.4-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m44.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub, SpeechRecognition
Successfully installed SpeechRecognition-3.10.4 pydub-0.25.1
Collecting pyttsx3
  Downloading pyttsx3-2.90-py3-none-any.whl (39 kB)
Installing collected packages: pyttsx3
Successfully installed pyttsx3-2.90
Transcription: hello my name is Kate thank you for clicking on to the video to get to know me a little bit better I guess what I wanted to say to you today was one of the reasons that drove me to apply to your organization rather than some others I really liked on your web page that you have some great core values which I feel are aligned with myself and not only that but behind us core values you're doing a lot of work in helping the

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertModel

# Load the preprocessed data
data = pd.read_csv("dataaa2.csv")
print(data.isnull().sum())

data.dropna(inplace=True)
print(data.isnull().sum())

# Encode position labels
position_mapping = {
    "HOD": 0,
    "Associate Professor": 1,
    "Assistant Professor": 2,
    "Reject": 3
}
data["Position"] = data["Position"].map(position_mapping)

# Create a custom dataset class for our data
class InterviewDataset(torch.utils.data.Dataset):
    def __init__(self, data, tokenizer):
        self.data = data
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        text = self.data.iloc[idx, 0]
        label = self.data.iloc[idx, 1]
        position_label = self.data.iloc[idx, 2]

        encoding = self.tokenizer.encode_plus(
            text,
            max_length=512,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].flatten(),
            "attention_mask": encoding["attention_mask"].flatten(),
            "label": torch.tensor(label, dtype=torch.long),
            "position_label": torch.tensor(position_label, dtype=torch.long)
        }

    def __len__(self):
        return len(self.data)

# Create a data loader for our dataset
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
dataset = InterviewDataset(data, tokenizer)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)

# Define the machine learning model
class InterviewModel(nn.Module):
    def __init__(self):
        super(InterviewModel, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.dropout = nn.Dropout(0.1)
        self.selection_classifier = nn.Linear(self.bert.config.hidden_size, 2)
        self.position_classifier = nn.Linear(self.bert.config.hidden_size, 4)  # Update the number of classes to 4

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)

        selection_outputs = self.selection_classifier(pooled_output)
        position_outputs = self.position_classifier(pooled_output)

        return selection_outputs, position_outputs

model = InterviewModel()

# Train the model
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(10):
    model.train()  # Ensure the model is in training mode
    epoch_loss = 0  # Initialize epoch loss
    for batch in data_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)
        position_labels = batch["position_label"].to(device)

        optimizer.zero_grad()

        selection_outputs, position_outputs = model(input_ids, attention_mask)
        selection_loss = criterion(selection_outputs, labels)
        position_loss = criterion(position_outputs, position_labels)
        loss = selection_loss + position_loss

        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item()}")




Text        0
Selected    1
Position    1
dtype: int64
Text        0
Selected    0
Position    0
dtype: int64


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Epoch 1, Loss: 2.1757874488830566
Epoch 2, Loss: 2.0786213874816895
Epoch 3, Loss: 2.122929573059082
Epoch 4, Loss: 1.790969729423523
Epoch 5, Loss: 1.5014233589172363
Epoch 6, Loss: 1.950945258140564
Epoch 7, Loss: 1.824937105178833
Epoch 8, Loss: 1.9664533138275146
Epoch 9, Loss: 1.156768798828125
Epoch 10, Loss: 1.3882646560668945


In [2]:
import pandas as pd
data = {
    "Text": [
        "The candidate has extensive experience in research and leadership.",

    ],
    "Label": [0],
    "Position": ["Reject"]
}


test_data = pd.DataFrame(data)


test_data.to_csv("test_data.csv", index=False)


In [3]:
# Evaluate the model
model.eval()
test_data = pd.read_csv("test_data.csv")
test_data["Position"] = test_data["Position"].map(position_mapping)
test_dataset = InterviewDataset(test_data, tokenizer)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

selection_labels = ["Not Selected", "Selected"]
position_labels = ["HOD", "Associate Professor", "Assistant Professor", "Reject"]

with torch.no_grad():
    for batch in test_data_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)

        selection_outputs, position_outputs = model(input_ids, attention_mask)
        _, predicted_selection = torch.max(selection_outputs, 1)
        _, predicted_position = torch.max(position_outputs, 1)

        for pred_sel, pred_pos in zip(predicted_selection, predicted_position):
            print(f"Selection: {selection_labels[pred_sel]}, Position: {position_labels[pred_pos]}")

Selection: Selected, Position: Associate Professor


In [12]:
def predict(text):
    model.eval()
    encoding = tokenizer.encode_plus(
        text,
        max_length=512,
        padding="max_length",
        truncation=True,
        return_attention_mask=True,
        return_tensors="pt"
    )

    input_ids = encoding["input_ids"].to(device)
    attention_mask = encoding["attention_mask"].to(device)

    with torch.no_grad():
        selection_outputs, position_outputs = model(input_ids, attention_mask)
        _, predicted_selection = torch.max(selection_outputs, 1)
        _, predicted_position = torch.max(position_outputs, 1)

    return selection_labels[predicted_selection.item()], position_labels[predicted_position.item()]

# Take input from user
user_input = input("Enter the interview text: ")
selection, position = predict(user_input)

print(f"Selection: {selection}, Position: {position}")


Enter the interview text: I have a strong background in software engineering, and have developed several software systems for industry clients
Selection: Selected, Position: HOD
