In [None]:
# Libraries
import os
import pandas as pd
import numpy as np
import librosa
import soundfile as sf
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

In [None]:
# Load the Excel file
labels = pd.read_excel('label.xlsx')

# Display the first few rows
print(labels.head())

In [None]:
def extract_features(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    return np.hstack((np.mean(mfccs, axis=1), 
                      np.mean(chroma, axis=1), 
                      np.mean(spectral_contrast, axis=1)))

# Directory containing audio files
audio_dir = 'dataset/'

# Initialize lists to store features and labels
X = []
y_pronunciation = []
y_fluency = []

# Iterate over the rows of the labels DataFrame
for index, row in labels.iterrows():
    audio_path = os.path.join(audio_dir, row['audio'])
    if os.path.exists(audio_path):
        features = extract_features(audio_path)
        X.append(features)
        y_pronunciation.append(row['pronunciation'])
        y_fluency.append(row['fluency'])
    else:
        print(f'File {audio_path} does not exist.')

X = np.array(X)
y_pronunciation = np.array(y_pronunciation)
y_fluency = np.array(y_fluency)

print(f'Feature matrix shape: {X.shape}')
print(f'Pronunciation scores shape: {y_pronunciation.shape}')
print(f'Fluency scores shape: {y_fluency.shape}')


In [None]:
# Split the data
X_train, X_test, y_pron_train, y_pron_test = train_test_split(X, y_pronunciation, test_size=0.2, random_state=42)
X_train, X_test, y_flu_train, y_flu_test = train_test_split(X, y_fluency, test_size=0.2, random_state=42)

# Initialize and train the model for pronunciation score
model_pron = SVR(kernel='rbf')
model_pron.fit(X_train, y_pron_train)

# Initialize and train the model for fluency score
model_flu = SVR(kernel='rbf')
model_flu.fit(X_train, y_flu_train)

# Make predictions
y_pron_pred = model_pron.predict(X_test)
y_flu_pred = model_flu.predict(X_test)

# Evaluate the model
mse_pron = mean_squared_error(y_pron_test, y_pron_pred)
mse_flu = mean_squared_error(y_flu_test, y_flu_pred)

print(f'Pronunciation MSE: {mse_pron}')
print(f'Fluency MSE: {mse_flu}')

In [None]:
# Assuming you have a function to extract features from a single audio file
new_audio_features = extract_features('don1.mp3')

# Predict pronunciation score
pron_score_pred = model_pron.predict(new_audio_features.reshape(1, -1))

# Predict fluency score
fluency_score_pred = model_flu.predict(new_audio_features.reshape(1, -1))

print(f'Predicted Pronunciation Score: {pron_score_pred[0]}')
print(f'Predicted Fluency Score: {fluency_score_pred[0]}')