In [15]:
import requests
import json
from typing import List, Dict, Any
import statistics
from pathlib import Path
from collections import Counter
from scipy.stats import pearsonr

class MoodleReadabilityAPI:
    def __init__(self, base_url: str, token: str):
        self.base_url = base_url
        self.token = token
        self.endpoint = f"{self.base_url}/webservice/rest/server.php"

    def calculate_readability(self, texts: List[str], urls: List[str]) -> List[Dict[str, Any]]:
        if len(texts) != len(urls):
            raise ValueError("The number of texts and URLs must match")

        results = []
        for text, url in zip(texts, urls):
            params = {
                'wstoken': self.token,
                'wsfunction': 'block_readabilityscore_process_text',
                'moodlewsrestformat': 'json',
                'selectedtext': text,
                'pageurl': url
            }

            try:
                response = requests.post(self.endpoint, data=params)
                response.raise_for_status()
                result = response.json()
                results.append(result)
            except requests.exceptions.RequestException as e:
                raise

        return results

def read_corpus(base_dir: str) -> List[Dict[str, Any]]:
    corpus_dir = Path(base_dir) / "Texts-SeparatedByReadingLevel"
    if not corpus_dir.is_dir():
        raise FileNotFoundError(f"Corpus directory not found: {corpus_dir}")

    corpus = []
    level_dirs = {'Ele-Txt': 'elementary', 'Int-Txt': 'intermediate', 'Adv-Txt': 'advanced'}
    
    for dir_name, level in level_dirs.items():
        level_dir = corpus_dir / dir_name
        if not level_dir.is_dir():
            continue
        
        for file_path in level_dir.glob('*.txt'):
            with open(file_path, 'r', encoding='utf-8') as file:
                text = file.read().strip()
                corpus.append({
                    'text': text,
                    'level': level,
                    'file': file_path.name
                })
    
    return corpus

def classify_score(score: float) -> str:
    if score <= 9:
        return 'elementary'
    elif score <= 12:
        return 'intermediate'
    else:
        return 'advanced'

def validate_accuracy(api: MoodleReadabilityAPI, corpus: List[Dict[str, Any]]) -> Dict[str, Any]:
    texts = [item['text'] for item in corpus]
    urls = [f"http://example.com/{item['file']}" for item in corpus]
    results = api.calculate_readability(texts, urls)

    plugin_scores = [result['readabilityscore'] for result in results]
    corpus_levels = [item['level'] for item in corpus]

    level_map = {'elementary': 1, 'intermediate': 2, 'advanced': 3}
    mapped_levels = [level_map[classify_score(score)] for score in plugin_scores]
    
    correlation, _ = pearsonr(plugin_scores, mapped_levels)

    correct_classifications = sum(1 for result, item in zip(results, corpus) if classify_score(result['readabilityscore']) == item['level'])
    accuracy = correct_classifications / len(corpus)

    level_accuracy = {}
    level_confusion = {level: Counter() for level in ['elementary', 'intermediate', 'advanced']}
    for level in ['elementary', 'intermediate', 'advanced']:
        level_items = [item for item in corpus if item['level'] == level]
        level_results = [result for result, item in zip(results, corpus) if item['level'] == level]
        correct = sum(1 for result, item in zip(level_results, level_items) if classify_score(result['readabilityscore']) == item['level'])
        level_accuracy[level] = correct / len(level_items) if level_items else 0
        
        for result, item in zip(level_results, level_items):
            predicted = classify_score(result['readabilityscore'])
            level_confusion[level][predicted] += 1

    return {
        'correlation': correlation,
        'overall_accuracy': accuracy,
        'level_accuracy': level_accuracy,
        'level_confusion': level_confusion,
        'detailed_results': [
            {
                'file': item['file'],
                'actual_level': item['level'],
                'predicted_level': classify_score(result['readabilityscore']),
                'gunning_fog_index': result['readabilityscore']
            }
            for item, result in zip(corpus, results)
        ]
    }

if __name__ == "__main__":
    api = MoodleReadabilityAPI("http://192.168.178.108:8000", "bd1fed61a067bfb27e00ae09c23960a8")
    base_dir = "/home/anrichp/Documents/Development/OneStopEnglishCorpus"
    
    try:
        corpus = read_corpus(base_dir)

        accuracy_results = validate_accuracy(api, corpus)
        
        print("\nAccuracy Validation Results:")
        print(f"Overall Accuracy: {accuracy_results['overall_accuracy']:.2%}")
        print(f"Correlation: {accuracy_results['correlation']:.2f}")
        print("\nAccuracy by Level:")
        for level, acc in accuracy_results['level_accuracy'].items():
            print(f"  {level.capitalize()}: {acc:.2%}")

        print("\nConfusion Matrix:")
        for actual_level, predictions in accuracy_results['level_confusion'].items():
            print(f"  {actual_level.capitalize()}:")
            for predicted_level, count in predictions.items():
                print(f"    Predicted as {predicted_level}: {count}")

        print("\nDetailed Results:")
        for result in accuracy_results['detailed_results']:
            print(f"File: {result['file']}")
            print(f"  Actual Level: {result['actual_level']}")
            print(f"  Predicted Level: {result['predicted_level']}")
            print(f"  Gunning Fog Index: {result['gunning_fog_index']:.2f}")
            print()

    except FileNotFoundError as e:
        print(f"Error: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")


SyntaxError: unmatched ']' (45306072.py, line 71)