<a href="https://www.kaggle.com/code/anishakumari202263/grammar-scoring-engine-from-voice-samples?scriptVersionId=281249970" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
# ----------------------------
# 1. Install Dependencies
# ----------------------------
!pip install openai-whisper
!pip install language_tool_python

# ----------------------------
# 2. Imports
# ----------------------------
import os
import whisper
import language_tool_python
import pandas as pd
from IPython.display import Audio

# ----------------------------
# 3. Initialize Models
# ----------------------------
# Load Whisper model for speech-to-text
print("Loading Whisper model...")
model = whisper.load_model("base")  # For Kaggle, 'tiny' or 'base' is recommended

# Initialize LanguageTool for grammar checking
print("Initializing LanguageTool...")
tool = language_tool_python.LanguageTool('en-US')

# ----------------------------
# 4. Define Folder Paths
# ----------------------------
# Folder containing audio samples
audio_folder = "/kaggle/input/voice_samples/"  # Change if different
output_csv = "/kaggle/working/grammar_scores.csv"

# ----------------------------
# 5. Helper Function: Process Single Audio File
# ----------------------------
def process_audio_file(file_path):
    """
    Input: Path to audio file
    Output: Dict with original text, corrected text, grammar score, total errors
    """
    # Transcribe audio
    result = model.transcribe(file_path)
    text = result['text']

    # Grammar checking
    matches = tool.check(text)
    corrected_text = tool.correct(text)

    # Simple scoring: % of words without errors
    total_errors = len(matches)
    word_count = max(1, len(text.split()))
    grammar_score = max(0, 100 - (total_errors / word_count * 100))

    return {
        "filename": os.path.basename(file_path),
        "original_text": text,
        "corrected_text": corrected_text,
        "total_errors": total_errors,
        "grammar_score": round(grammar_score, 2)
    }

# ----------------------------
# 6. Process All Audio Files
# ----------------------------
results = []

for file_name in os.listdir(audio_folder):
    if file_name.lower().endswith((".wav", ".mp3", ".m4a")):
        file_path = os.path.join(audio_folder, file_name)
        print(f"Processing {file_name}...")
        result = process_audio_file(file_path)
        results.append(result)

# ----------------------------
# 7. Save Results to CSV
# ----------------------------
df = pd.DataFrame(results)
df.to_csv(output_csv, index=False)
print(f"Results saved to {output_csv}")

# ----------------------------
# 8. Display Results
# ----------------------------
df.head()


Collecting openai-whisper
  Downloading openai_whisper-20250625.tar.gz (803 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.2/803.2 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->openai-whisper)
  Downloading nvidi

100%|████████████████████████████████████████| 139M/139M [00:01<00:00, 115MiB/s]


Initializing LanguageTool...


SystemError: Detected java 11.0. LanguageTool requires Java >= 17 for version latest.

In [4]:
import os

# List all datasets in /kaggle/input
print("Datasets in /kaggle/input:")
print(os.listdir("/kaggle/input"))


Datasets in /kaggle/input:
['audio-file', 'audio-recording-sample']


In [5]:
import os

# Check first dataset
audio_folder1 = "/kaggle/input/audio-file"
print("Files in audio-file dataset:", os.listdir(audio_folder1))

# Check second dataset
audio_folder2 = "/kaggle/input/audio-recording-sample"
print("Files in audio-recording-sample dataset:", os.listdir(audio_folder2))


Files in audio-file dataset: ['WhatsApp Audio 2025-11-22 at 18.01.39_5153fc32.dat.unknown']
Files in audio-recording-sample dataset: ['Audio recordin sample.unknown']


In [6]:
# ----------------------------
# 1. Install Dependencies
# ----------------------------
!pip install --quiet openai-whisper
!pip install --quiet textblob
!pip install --quiet pandas

# ----------------------------
# 2. Imports
# ----------------------------
import os
import whisper
import pandas as pd
from textblob import TextBlob
from IPython.display import Audio, display, HTML

# ----------------------------
# 3. Initialize Whisper Model
# ----------------------------
print("Loading Whisper model...")
model = whisper.load_model("tiny")  # Fast for Kaggle

# ----------------------------
# 4. Define Audio Folder Paths
# ----------------------------
audio_folder = "/kaggle/input/kaggle-audio"
output_csv = "/kaggle/working/grammar_scores.csv"

# Check audio files
audio_files = [f for f in os.listdir(audio_folder) if f.lower().endswith((".wav", ".mp3", ".m4a"))]
if not audio_files:
    raise Exception("No audio files found in folder:", audio_folder)

print("Audio files found:", audio_files)

# ----------------------------
# 5. Helper Function: Process Single Audio File
# ----------------------------
def process_audio_file(file_path):
    """
    Transcribe audio, correct grammar using TextBlob, compute grammar score
    """
    # Transcribe audio
    result = model.transcribe(file_path)
    text = result['text'].strip()
    
    # Grammar correction using TextBlob
    blob = TextBlob(text)
    corrected_text = str(blob.correct())
    
    # Grammar score: % of words unchanged
    original_words = text.split()
    corrected_words = corrected_text.split()
    errors = sum(1 for o, c in zip(original_words, corrected_words) if o != c)
    word_count = max(1, len(original_words))
    grammar_score = max(0, 100 - (errors / word_count * 100))
    
    return {
        "filename": os.path.basename(file_path),
        "original_text": text,
        "corrected_text": corrected_text,
        "total_errors": errors,
        "grammar_score": round(grammar_score, 2)
    }

# ----------------------------
# 6. Process All Audio Files
# ----------------------------
results = []
for file_name in audio_files:
    file_path = os.path.join(audio_folder, file_name)
    print(f"Processing {file_name}...")
    result = process_audio_file(file_path)
    results.append(result)

# ----------------------------
# 7. Save Results to CSV
# ----------------------------
df = pd.DataFrame(results)
df.to_csv(output_csv, index=False)
print(f"Results saved to {output_csv}")

# ----------------------------
# 8. Display Interactive Table with Audio
# ----------------------------
table_html = "<table border='1' style='border-collapse:collapse'><tr><th>Audio</th><th>Original Text</th><th>Corrected Text</th><th>Total Errors</th><th>Grammar Score</th></tr>"

for idx, row in df.iterrows():
    audio_path = os.path.join(audio_folder, row['filename'])
    table_html += f"<tr>"
    table_html += f"<td>{Audio(audio_path, embed=True)._repr_html_()}</td>"
    table_html += f"<td>{row['original_text']}</td>"
    table_html += f"<td>{row['corrected_text']}</td>"
    table_html += f"<td>{row['total_errors']}</td>"
    table_html += f"<td>{row['grammar_score']}</td>"
    table_html += f"</tr>"

table_html += "</table>"

display(HTML(table_html))


Loading Whisper model...
Audio files found: ['Kaggle Audio.mp3']
Processing Kaggle Audio.mp3...




Results saved to /kaggle/working/grammar_scores.csv


Audio,Original Text,Corrected Text,Total Errors,Grammar Score
Your browser does not support the audio element.,"Hello, my name is Anisha Kumari. I completed my BTX in the central University of Rio now. I actually work on Asinjin Company right now. I am looking for a new job as a data and this. My hobbies are playing that written novel books, exploring new places. I love to do gardening whenever I feel low.","Hello, my name is Anisya Kumari. I completed my BTX in the central University of Rio now. I actually work on Asinjin Company right now. I am looking for a new job as a data and this. By hobbies are playing that written novel books, exploring new places. I love to do gardening whenever I feel low.",2,96.49


Notes:

FP16 warning is normal on CPU — Whisper defaults to FP32, nothing to worry about.

The table shows:

Audio	Original Text	Corrected Text	Total Errors	Grammar Score
Kaggle Audio.mp3	"Hello, my name is Anisha..."	"Hello, my name is Anisya..."	2	96.49