# IELTS ASSESSMENT

#### Step 1 : Converting the audio format(must)
function: check_and_convert_audio(file_path)

- The audio format should be one of the supported formats: wav, mp3, opus, ogg, amr.
- The audio should be in mono channel ("channel": 1), not stereo.
- The audio should have a sampling rate of 16000 Hz.
- The audio should have a bitrate of at least 96 kbps to ensure optimal API performance.

Requirements: 
- pip install pydub </br>

~ *Make sure you have pydub and ffmpeg installed. You can install pydub using pip, and ffmpeg needs to be installed separately and available in your system's PATH.*

In [6]:
import os
from pydub import AudioSegment

# Supported formats
SUPPORTED_FORMATS = ["wav", "mp3", "opus", "ogg", "amr"]

def convert_to_wav(input_file, output_file):
    # Load the .m4a file
    audio = AudioSegment.from_file(input_file, format="m4a")
    
    # Export as .wav
    audio.export(output_file, format="wav")

def check_and_convert_audio(audio_path):
    
    # Check file extension
    file_ext = audio_path.split('.')[-1].lower()
    if file_ext not in SUPPORTED_FORMATS:
        # raise ValueError(f"Unsupported audio format: {file_ext}")
        convert_to_wav(audio_path,audio_path)

    # Load audio file
    audio = AudioSegment.from_file(audio_path)
    
    # Flags to check if conversion is needed
    needs_conversion = False
    
    # Check if mono
    if audio.channels != 1:
        print(f"Audio is stereo. Converting to mono.")
        audio = audio.set_channels(1)
        needs_conversion = True
    
    # Check sampling rate
    if audio.frame_rate != 16000:
        print(f"Audio sampling rate is {audio.frame_rate} Hz. Converting to 16000 Hz.")
        audio = audio.set_frame_rate(16000)
        needs_conversion = True
    
    # Check bitrate (Pydub cannot directly get bitrate, so we assume conversion if below threshold)
    if file_ext != "wav":
        print(f"Converting audio to have at least 96 kbps bitrate.")
        needs_conversion = True

    if needs_conversion:
        output_path = f"converted_{os.path.basename(audio_path)}"
        output_path = ".".join([output_path.split(".")[0],"wav"])
        audio.export(output_path, format="wav", bitrate="96k")
        print(f"Audio converted and saved as {output_path}")
        return output_path
    else:
        print("Audio meets all criteria. No conversion needed.")
        return audio_path

![alt text](<IELTS.png>)

### Step 2 : Extracting the results 

Requirements: 
- pip install hashlib
- pip install requests

Run the following script to gather the assessment result json file

In [7]:
#_*_encoding:utf-8_*_
import time
import hashlib
import requests
import json
import config    
    
def get_IELTS_assessment_report(input_audio,question_ielts):
    appKey = config.appKey
    secretKey = config.secretKey

    baseURL = "https://api.speechsuper.com/"

    timestamp = str(int(time.time()))
    coreType =  "speak.eval.pro"
     # Change the coreType according to your needs.
    question =  question_ielts # Change the reference text according to your needs.
    
    #transforming the input audio as per required parameters
    modified_audio = check_and_convert_audio(input_audio)
    
    audioPath = modified_audio# Change the audio path corresponding to the reference text.
    audioType = audioPath.split('.')[-1].lower() # Change the audio type corresponding to the audio file.
    audioSampleRate = 16000
    userId = "guest"

    url =  baseURL + coreType
    connectStr = (appKey + timestamp + secretKey).encode("utf-8")
    connectSig = hashlib.sha1(connectStr).hexdigest()
    startStr = (appKey + timestamp + userId + secretKey).encode("utf-8")
    startSig = hashlib.sha1(startStr).hexdigest()
    
    params={
        "connect":{
            "cmd":"connect",
            "param":{
                "sdk":{
                    "version":16777472,
                    "source":9,
                    "protocol":2
                },
                "app":{
                    "applicationId":appKey,
                    "sig":connectSig,
                    "timestamp":timestamp
                }
            }
        },
        "start":{
            "cmd":"start",
            "param":{
                "app":{
                    "userId":userId,
                    "applicationId":appKey,
                    "timestamp":timestamp,
                    "sig":startSig
                },
                "audio":{
                    "audioType":audioType,
                    "channel":1,
                    "sampleBytes":2,
                    "sampleRate":audioSampleRate
                },
                "request":{
                    "coreType":"speak.eval.pro",
                    "question_prompt":question,
                    "test_type": "ielts",
                    "task_type": "ielts_part1",
                    "model": "non_native",
                    "penalize_offtopic": 1,
                    "phoneme_output":1
                }

            }
        }
    }
    datas=json.dumps(params)
    data={'text':datas}
    headers={"Request-Index":"0"}
    files={"audio":open(audioPath,'rb')}
    res=requests.post(url, data=data, headers=headers, files=files)
    final_assessment_result = res.text.encode('utf-8', 'ignore').decode('utf-8')
    final_assessment_result = json.loads(json.dumps(final_assessment_result))
    return final_assessment_result

In [8]:
q = "What are your hobbies?"
audio = "converted_What are your hobbies.wav"

In [9]:
result = get_IELTS_assessment_report(input_audio = q,question_ielts= audio)

Audio sampling rate is 48000 Hz. Converting to 16000 Hz.
Audio converted and saved as converted_converted_advertisement.wav


In [10]:
json.dumps(json.loads(result))
# json.dumps: takes a Python object (typically a dictionary or list) and converts it into a JSON-formatted string.
# json.loads: parses (or deserializes) a JSON-formatted string (result) and converts it into a Python dictionary (or list, depending on the JSON structure).
# json.dumps(json.loads : normalizes the JSON string.

'{"dtLastResponse": "2024-07-24 11:18:50:316", "applicationId": "1718172209000328", "recordId": "66a0728313f0a21f00014f24", "eof": 1, "result": {"overall": 7, "rhythm": 88, "speed": 106, "fluency_stats": {"liaison_cnt": 10, "loss_of_plosion_cnt": 5, "pause_cnt": 42}, "kernel_version": "1.4.8", "pronunciation": 7, "resource_version": "1.1.6", "numeric_duration": 94.871, "equivalent_scores": {"toefl_scores": {"overall": 100, "fluency": 97, "vocab": 112, "grammar": 97, "pronunciation": 100}, "pte_scores": {"overall": 82, "fluency": 79, "vocab": 86, "grammar": 79, "pronunciation": 82}, "duolingo_scores": {"overall": 145, "fluency": 140, "vocab": 150, "grammar": 140, "pronunciation": 145}, "percentage_scores": {"overall": 82, "fluency": 80, "vocab": 87, "grammar": 80, "pronunciation": 82}, "ket_scores": {"overall": 150, "fluency": 150, "vocab": 150, "grammar": 150, "pronunciation": 150}, "pet_scores": {"overall": 170, "fluency": 170, "vocab": 170, "grammar": 170, "pronunciation": 170}, "cef

In [11]:
with open("results.json","w") as file:
    json.dump(json.loads(result),file,indent=4)