<a href="https://colab.research.google.com/github/akshra09/Grammer-Scoring-Engine/blob/main/Grammer_scoring_Engine_Akshra_Verma.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
#Using kaggle to host
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"akshraverma09","key":"bf042976131eaf510a8b28bef95e41b2"}'}

In [10]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [11]:
!kaggle datasets download tapakah68/emotions-on-audio-dataset

Dataset URL: https://www.kaggle.com/datasets/tapakah68/emotions-on-audio-dataset
License(s): Attribution-NonCommercial-NoDerivatives 4.0 International (CC BY-NC-ND 4.0)


In [12]:
!unzip emotions-on-audio-dataset.zip -d emotions_audio_data


Archive:  emotions-on-audio-dataset.zip
  inflating: emotions_audio_data/files/00026029e0--64991b6eef1fe70609d48edc/euphoric.wav  
  inflating: emotions_audio_data/files/00026029e0--64991b6eef1fe70609d48edc/joyfully.wav  
  inflating: emotions_audio_data/files/00026029e0--64991b6eef1fe70609d48edc/sad.wav  
  inflating: emotions_audio_data/files/00026029e0--64991b6eef1fe70609d48edc/surprised.wav  
  inflating: emotions_audio_data/files/00026029e0--64991b72e0daf97163c09c66/euphoric.wav  
  inflating: emotions_audio_data/files/00026029e0--64991b72e0daf97163c09c66/joyfully.wav  
  inflating: emotions_audio_data/files/00026029e0--64991b72e0daf97163c09c66/sad.wav  
  inflating: emotions_audio_data/files/00026029e0--64991b72e0daf97163c09c66/surprised.wav  
  inflating: emotions_audio_data/files/00026029e0--64991b7fd94c0d5726dec353/euphoric.wav  
  inflating: emotions_audio_data/files/00026029e0--64991b7fd94c0d5726dec353/joyfully.wav  
  inflating: emotions_audio_data/files/00026029e0--64991b7

In [13]:
#installing important libraries
!pip install -q openai-whisper
!pip install -q language-tool-python
!pip install -q gradio

In [14]:

!pip install -q gradio

In [15]:
# Import necessary libraries
import whisper # For speech-to-text transcription
import language_tool_python # For grammar checking
import gradio as gr # For building the web-based UI
import matplotlib.pyplot as plt # For plotting the grammar score chart
import tempfile # For temporary file handling
import os

# Fix matplotlib backend for Colab
import matplotlib
matplotlib.use('Agg')

# Score history
score_history = []

# LanguageTool supports different regional language codes.
# We map Whisper language codes to LanguageTool-compatible codes
lt_languages = {
    'en': 'en-US',
    'es': 'es',       # Spanish
    'fr': 'fr',       # French
    'de': 'de',       # German
    'it': 'it',       # Italian
    'pt': 'pt-PT',    # Portuguese
    # Add more if needed
}

In [16]:
# Define a class to handle transcription and grammar scoring
class GrammarScorer:
    def __init__(self):
        print("Loading Whisper model...")
        self.model = whisper.load_model("base")   # Load the "base" model of Whisper (lightweight but good)
        print("Whisper model loaded.")

    def transcribe(self, audio_path):
        print(f"Transcribing: {audio_path}")
        result = self.model.transcribe(audio_path)  # Transcribe the audio
        text = result["text"] # Extract the transcribed text
        lang = result["language"]  # Extract the detected language
        print(f"Detected language: {lang}, Text: {text}")
        return text, lang

    def grammar_score(self, text, lang_code):
      # Use LanguageTool code (fallback to 'en-US' if unknown)
        lt_code = lt_languages.get(lang_code, 'en-US')
        tool = language_tool_python.LanguageTool(lt_code)

        matches = tool.check(text)  # Check grammar issues in the text
        num_errors = len(matches)
        num_words = len(text.split())

        # Calculate grammar score: max 10, deducted by error rate
        error_rate = num_errors / max(num_words, 1)
        score = round(max(0, 10 - error_rate * 10), 2)

        # Collect human-readable feedback from grammar matches
        feedback = [f"• {m.message} (\"{text[m.offset:m.offset + m.errorLength]}\")" for m in matches]

        print(f"Errors: {num_errors}, Words: {num_words}, Score: {score}")
        return score, feedback


    def process_audio(self, audio_path):
        text, lang = self.transcribe(audio_path) # Get transcription and language
        score, feedback = self.grammar_score(text, lang) # Get score and feedback
        score_history.append(score)
        return text, score, feedback, lang

In [17]:
# Function to plot grammar score history chart
def plot_score_chart():
    if not score_history:
        return None

    plt.figure(figsize=(5, 2))
    plt.plot(score_history, marker='o', color='blue')
    plt.title("Grammar Score History")
    plt.xlabel("Attempt")
    plt.ylabel("Score")
    plt.ylim(0, 10)
    plt.grid(True)

     # Save plot as temporary image file
    tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
    plt.savefig(tmpfile.name)
    plt.close()
    return tmpfile.name

In [18]:
# Initialize grammar scorer object
scorer = GrammarScorer()

Loading Whisper model...
Whisper model loaded.


In [19]:
# Main function that gets called when user clicks 'Analyze'
def analyze(audio):
    text, score, feedback, lang = scorer.process_audio(audio)
    chart_path = plot_score_chart()
    return text, score, "\n".join(feedback if feedback else ["No issues found."]), f"Detected: {lang.upper()}", chart_path




In [20]:
# 🌐 Build the Gradio Web Interface

with gr.Blocks() as app:
    gr.Markdown("# 🗣️ Grammar Scoring Engine (Multilingual)")
    gr.Markdown("Upload or record your voice. We'll transcribe, detect grammar issues, and give a score + feedback!")

     # Audio input section (record or upload)
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="🎤 Record or Upload Audio")

    # Transcription and score output
    with gr.Row():
        transcript = gr.Textbox(label="📝 Transcription")
        score = gr.Number(label="✅ Grammar Score")

    # Feedback and additional outputs
    eedback_box = gr.Textbox(label="📋 Feedback")
    lang_box = gr.Textbox(label="🌍 Detected Language")
    chart_output = gr.Image(label="📈 Score History")

    # Button to trigger analysis
    analyze_btn = gr.Button("Analyze")
    analyze_btn.click(fn=analyze, inputs=[audio_input], outputs=[transcript, score, feedback_box, lang_box, chart_output])

# Launch the app (share=True generates a public link for testing)
app.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2ebbf646a8933387c6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


