In [1]:
import speech_recognition as sr
import language_tool_python

supervised_learning = "Supervised learning is a type of machine learning where the algorithm is trained on a labeled dataset, meaning that the input data is paired with corresponding output labels. The goal of supervised learning is to learn a mapping function from the input variables to the output variables, based on the labeled examples provided during training. In other words, the algorithm learns to make predictions or decisions by generalizing patterns from the labeled training data.Examples of supervised learning include image classification, spam email detection, handwriting recognition, speech recognition, predicting stock prices, language translation, medical diagnosis, customer churn prediction, autonomous vehicles, and credit scoring."
unsupervised_learning = "Unsupervised learning is a machine learning paradigm where the algorithm is trained on unlabeled data, and the objective is to discover inherent patterns or structures within the data without explicit guidance. In unsupervised learning, the algorithm explores the data's inherent structure, often through clustering or dimensionality reduction techniques. Examples of unsupervised learning include clustering similar documents in a large corpus, anomaly detection to identify unusual patterns, dimensionality reduction for feature extraction, topic modeling to discover themes in text data, and generative modeling for creating new data instances based on the learned patterns"
reinforcement_learning = "Reinforcement learning is a type of machine learning where an agent learns to make decisions by interacting with an environment. The agent receives feedback in the form of rewards or penalties based on its actions. The goal is for the agent to learn a policy that maximizes the cumulative reward over time."
overfitting = "Overfitting occurs when a machine learning model learns the training data too well, including its noise and outliers, leading to poor generalization on new, unseen data. It often results from models being too complex relative to the amount of training data. To address overfitting, techniques such as regularization, reducing model complexity, increasing the size of the training dataset, and using cross-validation to evaluate model performance on different subsets of data can be employed"
cross_validation = "Cross-validation is a technique used to assess a model's performance by partitioning the dataset into multiple subsets. The model is trained on some subsets and tested on others, allowing for a more robust evaluation. Common methods include k-fold cross-validation, where the data is divided into k subsets, and the model is trained and tested k times, each time using a different subset for testing."
bias_variance_tradeoff = "The bias-variance tradeoff is a fundamental concept in machine learning that deals with finding the right level of model complexity. High bias (underfitting) occurs when a model is too simple and cannot capture the underlying patterns in the data. High variance (overfitting) occurs when a model is too complex and fits the training data too closely, failing to generalize well to new data. Balancing bias and variance is crucial for building models that perform well on diverse datasets. Regularization techniques and model selection are commonly used to manage the bias-variance tradeoff"


r = sr.Recognizer()


def speech_to_text(audio_file):
  """
  Opens and listens to an audio file and translates it to text
  Args: audio file
  Returns: text of transcribed audio file
  """
  with sr.AudioFile(audio_file) as source:
    audio_text = r.listen(source)
    try:
        text = r.recognize_google(audio_text)
        print('Converting audio transcripts into text ...')
        # print(text)
    except:
         print('Sorry.. run again...')

    return text



import spacy

def extract_keywords(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    keywords = [token.text for token in doc if token.is_alpha]
    return keywords



def check_semantic(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)

    # Example: Check for specific semantic elements like named entities
    entities = [ent.text for ent in doc.ents]

    # You can also use dependency parsing to analyze sentence structure
    # For example, check if certain words are connected in a specific way
    root_verb = [token.text for token in doc if token.dep_ == 'ROOT' and token.pos_ == 'VERB']

    # Example custom scoring function: Assign a score based on the number of entities
    # and the presence of a root verb
    semantic_score = calculate_semantic_score(entities, root_verb)

    return semantic_score



def calculate_semantic_score(entities, root_verb):
    # Example: Assign a higher score if entities and a root verb are present
    score = 0
    if entities:
        score += 0.5
    if root_verb:
        score += 0.5
    return score



def check_syntax(text):
    tool = language_tool_python.LanguageTool('en-US')
    matches = tool.check(text)

    # Count the number of matches
    num_matches = len(matches)

    # Assign a score based on the number of matches
    # You can customize this scoring logic based on your requirements
    score = (1 - (num_matches*0.035))  # For example, inverse of the number of matches

    return score, matches



def score_answer(chosen_number, keywords):
    correct_answers = [
        "Supervised learning is a type of machine learning where the algorithm is trained on a labeled dataset, meaning that the input data is paired with corresponding output labels. The goal of supervised learning is to learn a mapping function from the input variables to the output variables, based on the labeled examples provided during training. In other words, the algorithm learns to make predictions or decisions by generalizing patterns from the labeled training data.Examples of supervised learning include image classification, spam email detection, handwriting recognition, speech recognition, predicting stock prices, language translation, medical diagnosis, customer churn prediction, autonomous vehicles, and credit scoring.",
        "Unsupervised learning is a machine learning paradigm where the algorithm is trained on unlabeled data, and the objective is to discover inherent patterns or structures within the data without explicit guidance. In unsupervised learning, the algorithm explores the data's inherent structure, often through clustering or dimensionality reduction techniques. Examples of unsupervised learning include clustering similar documents in a large corpus, anomaly detection to identify unusual patterns, dimensionality reduction for feature extraction, topic modeling to discover themes in text data, and generative modeling for creating new data instances based on the learned patterns",
        "Reinforcement learning is a type of machine learning where an agent learns to make decisions by interacting with an environment. The agent receives feedback in the form of rewards or penalties based on its actions. The goal is for the agent to learn a policy that maximizes the cumulative reward over time.",
        "Overfitting occurs when a machine learning model learns the training data too well, including its noise and outliers, leading to poor generalization on new, unseen data. It often results from models being too complex relative to the amount of training data. To address overfitting, techniques such as regularization, reducing model complexity, increasing the size of the training dataset, and using cross-validation to evaluate model performance on different subsets of data can be employed",
        "Cross-validation is a technique used to assess a model's performance by partitioning the dataset into multiple subsets. The model is trained on some subsets and tested on others, allowing for a more robust evaluation. Common methods include k-fold cross-validation, where the data is divided into k subsets, and the model is trained and tested k times, each time using a different subset for testing.",
        "The bias-variance tradeoff is a fundamental concept in machine learning that deals with finding the right level of model complexity. High bias (underfitting) occurs when a model is too simple and cannot capture the underlying patterns in the data. High variance (overfitting) occurs when a model is too complex and fits the training data too closely, failing to generalize well to new data. Balancing bias and variance is crucial for building models that perform well on diverse datasets. Regularization techniques and model selection are commonly used to manage the bias-variance tradeoff"
    ]

    correct_answer = correct_answers[chosen_number - 1]  # Adjust for 0-based index

    keyword_match = set(keywords) & set(correct_answer.split())
    keyword_score = len(keyword_match) / len(correct_answer.split()) * 100
    return keyword_score


def calculate_scores(semantic, syntax, answer):
    total_score = 0.2 * semantic + 0.2 * syntax + 0.6 * answer
    return total_score



def evaluate_student_answer(audio_path, chosen_number):
    # Assuming correct_answer_keywords is a list of keywords related to the correct answer
    correct_answer_keywords = get_correct_answer_keywords(chosen_number)
    # Rest of the code remains the same
    text = speech_to_text(audio_path)
    keywords = extract_keywords(text)
    syntax_matches = check_syntax(text)
    semantic_score = check_semantic(text)
    answer_score = score_answer(chosen_number, keywords)  # Fix here
    syntax_score = 100 - (len(syntax_matches) * 3.5)  # Adjust as needed
    final_score = calculate_scores(semantic_score, syntax_score, answer_score)

    return final_score

def get_correct_answer_keywords(chosen_number):
    # Assuming chosen_number is between 1 and 6
    chosen_index = chosen_number - 1
    correct_answers = [
        "Supervised learning is a type of machine learning where the algorithm is trained on a labeled dataset, meaning that the input data is paired with corresponding output labels. The goal of supervised learning is to learn a mapping function from the input variables to the output variables, based on the labeled examples provided during training. In other words, the algorithm learns to make predictions or decisions by generalizing patterns from the labeled training data.Examples of supervised learning include image classification, spam email detection, handwriting recognition, speech recognition, predicting stock prices, language translation, medical diagnosis, customer churn prediction, autonomous vehicles, and credit scoring.",
        "Unsupervised learning is a machine learning paradigm where the algorithm is trained on unlabeled data, and the objective is to discover inherent patterns or structures within the data without explicit guidance. In unsupervised learning, the algorithm explores the data's inherent structure, often through clustering or dimensionality reduction techniques. Examples of unsupervised learning include clustering similar documents in a large corpus, anomaly detection to identify unusual patterns, dimensionality reduction for feature extraction, topic modeling to discover themes in text data, and generative modeling for creating new data instances based on the learned patterns",
        "Reinforcement learning is a type of machine learning where an agent learns to make decisions by interacting with an environment. The agent receives feedback in the form of rewards or penalties based on its actions. The goal is for the agent to learn a policy that maximizes the cumulative reward over time.",
        "Overfitting occurs when a machine learning model learns the training data too well, including its noise and outliers, leading to poor generalization on new, unseen data. It often results from models being too complex relative to the amount of training data. To address overfitting, techniques such as regularization, reducing model complexity, increasing the size of the training dataset, and using cross-validation to evaluate model performance on different subsets of data can be employed",
        "Cross-validation is a technique used to assess a model's performance by partitioning the dataset into multiple subsets. The model is trained on some subsets and tested on others, allowing for a more robust evaluation. Common methods include k-fold cross-validation, where the data is divided into k subsets, and the model is trained and tested k times, each time using a different subset for testing.",
        "The bias-variance tradeoff is a fundamental concept in machine learning that deals with finding the right level of model complexity. High bias (underfitting) occurs when a model is too simple and cannot capture the underlying patterns in the data. High variance (overfitting) occurs when a model is too complex and fits the training data too closely, failing to generalize well to new data. Balancing bias and variance is crucial for building models that perform well on diverse datasets. Regularization techniques and model selection are commonly used to manage the bias-variance tradeoff"
    ]

    return extract_keywords(correct_answers[chosen_index])

# Example usage
chosen_number = 1  # Assuming the student picked 1
paths = r"D:\AI_interview\recorded\answer_1.wav"
score = evaluate_student_answer(paths, chosen_number)
print(f"Final Score: {score}")


Converting audio transcripts into text ...


Downloading LanguageTool 5.7: 100%|█████████████████████████████████████████████████| 225M/225M [00:37<00:00, 6.01MB/s]
Unzipping C:\Users\haris\AppData\Local\Temp\tmpwrlumrvo.zip to C:\Users\haris\.cache\language_tool_python.
Downloaded https://www.languagetool.org/download/LanguageTool-5.7.zip to C:\Users\haris\.cache\language_tool_python.


Final Score: 32.71764705882353
