<a href="https://colab.research.google.com/github/deemalvidarshana/Sinhala-spell-checker/blob/main/spellchecker%20website/Sinhala_Grammar_Check_using_ML_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

**install the necessary libraries**

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from nltk.tokenize import word_tokenize
import nltk
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, classification_report

nltk.download('punkt')
nltk.download('punkt_tab')

**Step 1: Load and preprocess data**

In [None]:
file_path = '/content/drive/MyDrive/AI/grammar10.txt'
with open(file_path, 'r', encoding='utf-16') as file:
    data = [line.strip().split(" ", 1) for line in file.readlines()]

data_frame = pd.DataFrame(data, columns=["category", "text"])
data_frame["category"] = data_frame["category"].astype(int)
data_frame['text'] = data_frame['text'].fillna('')

text_data = data_frame['text']
labels = data_frame['category']

**Step 2: Split data**

In [None]:
X_train_data, X_test_data, y_train_labels, y_test_labels = train_test_split(text_data, labels, test_size=0.2, random_state=42)

**Step 3: Vectorization**

In [None]:
vectorizer_instance = CountVectorizer(tokenizer=word_tokenize, token_pattern=None)
X_train_vec = vectorizer_instance.fit_transform(X_train_data)
X_test_vec = vectorizer_instance.transform(X_test_data)

**Step 4: Train the model**

In [None]:
classifier_model = RandomForestClassifier(n_estimators=100, random_state=42)
classifier_model.fit(X_train_vec, y_train_labels)

**Step 5: Evaluate model accuracy**

In [None]:
predictions = classifier_model.predict(X_test_vec)
model_accuracy = accuracy_score(y_test_labels, predictions)
print(f"Accuracy: {model_accuracy:.2f}")

**Step 6: Define improved grammar check function**

In [None]:
def grammar_check(input_text, vectorizer, model, dataframe):
    """
    Check the grammar of a given input text and return the corrected text.
    """
    sentences = nltk.sent_tokenize(input_text)
    corrected_sentences = []
    all_correct = True

    for sentence in sentences:
        vectorized_text = vectorizer.transform([sentence])
        result = model.predict(vectorized_text)[0]

        if result == 1:
            corrected_sentences.append(sentence)  # Keep the original sentence if correct
        else:
            all_correct = False
            correct_texts = dataframe[dataframe['category'] == 1]['text']
            best_match = None
            highest_similarity = 0

            for correct_text in correct_texts:
                input_set = set(sentence.split())
                correct_set = set(correct_text.split())
                similarity_score = len(input_set.intersection(correct_set)) / len(input_set.union(correct_set)) if input_set.union(correct_set) else 0
                if similarity_score > highest_similarity:
                    highest_similarity = similarity_score
                    best_match = correct_text

            if best_match:
                corrected_sentences.append(best_match)
            else:
                corrected_sentences.append(sentence)

    corrected_text = " ".join(corrected_sentences)
    return corrected_text, all_correct

**Step 7: Cross-validation**

In [None]:
cross_val_scores = cross_val_score(classifier_model, X_train_vec, y_train_labels, cv=5, scoring='accuracy')
print(f"Cross-Validation Mean Accuracy: {cross_val_scores.mean():.2f}")

**Step 8: Hyperparameter tuning**

In [None]:
param_grid_values = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search_model = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_values, cv=3, n_jobs=-1, verbose=2)
grid_search_model.fit(X_train_vec, y_train_labels)
best_classifier_model = grid_search_model.best_estimator_

**Step 9: Confusion matrix and classification report**

In [None]:
conf_matrix = confusion_matrix(y_test_labels, predictions)
print(conf_matrix)
print(classification_report(y_test_labels, predictions))

**Step 10 : UI section**

In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML
import time
import nltk

nltk.download('punkt')

def sinhala_spell_checker_with_popup():
    # Add custom CSS for enhanced styling
    display(HTML("""
        <style>
            @keyframes fadeIn {
                from { opacity: 0; transform: translateY(-20px); }
                to { opacity: 1; transform: translateY(0); }
            }

            @keyframes shimmer {
                0% { background-position: -1000px 0; }
                100% { background-position: 1000px 0; }
            }

            @keyframes pulse {
                0% { transform: scale(1); }
                50% { transform: scale(1.05); }
                100% { transform: scale(1); }
            }

            .gradient-border {
                position: relative;
                background: linear-gradient(60deg, #5C7CFA, #748FFC, #91A7FF, #748FFC, #5C7CFA);
                background-size: 200% auto;
                animation: gradient 3s linear infinite;
            }

            @keyframes gradient {
                0% { background-position: 0% 50%; }
                50% { background-position: 100% 50%; }
                100% { background-position: 0% 50%; }
            }
        </style>
    """))

    def check_spelling(change):
        check_button.disabled = True
        check_button.description = "Checking..."
        loading_indicator.layout.display = 'flex'

        time.sleep(0.8)  # Simulate processing for smooth animation

        input_text = text_box.value.strip()
        if not input_text:
            show_error_popup("Please enter a sentence to check.")
            reset_button()
            return

        try:
            sentences = nltk.sent_tokenize(input_text)
            corrected_sentences = []
            all_correct = True

            for sentence in sentences:
                vectorized_text = vectorizer_instance.transform([sentence])
                result = classifier_model.predict(vectorized_text)[0]

                if result == 1:
                    corrected_sentences.append(sentence)
                else:
                    all_correct = False
                    correct_texts = data_frame[data_frame['category'] == 1]['text']
                    best_match = find_best_match(sentence, correct_texts)
                    corrected_sentences.append(best_match if best_match else sentence)

            show_result_popup(all_correct, " ".join(corrected_sentences))

        except Exception as e:
            show_error_popup(f"An error occurred: {str(e)}")
        finally:
            reset_button()

    def find_best_match(sentence, correct_texts):
        highest_similarity = 0
        best_match = None

        for correct_text in correct_texts:
            input_set = set(sentence.split())
            correct_set = set(correct_text.split())
            similarity = len(input_set.intersection(correct_set)) / len(input_set.union(correct_set)) if input_set.union(correct_set) else 0

            if similarity > highest_similarity:
                highest_similarity = similarity
                best_match = correct_text

        return best_match

    def show_result_popup(is_correct, corrected_text=""):
        if is_correct:
            popup_title.value = """
                <div style='text-align: center; animation: fadeIn 0.5s ease-out;'>
                    <div style='color: #40C057; font-size: 24px; margin-bottom: 10px;'>✨ correct</div>
                </div>
            """
            popup_message.value = """
                <div style='text-align: center; animation: fadeIn 0.5s ease-out;'>
                    <div style='font-size: 64px; margin: 20px 0; animation: pulse 2s infinite;'>🎯</div>
                    <div style='font-size: 20px; color: #40C057; font-weight: bold;'>
                        All sentences are spelled and grammatically correct!
                    </div>
                </div>
            """
        else:
            popup_title.value = """
                <div style='text-align: center; animation: fadeIn 0.5s ease-out;'>
                    <div style='color: #FF6B6B; font-size: 24px; margin-bottom: 10px;'>❌ Incorrect</div>
                </div>
            """
            popup_message.value = f"""
                <div style='text-align: center; animation: fadeIn 0.5s ease-out;'>
                    <div style='font-size: 64px; margin: 20px 0;'>🔍</div>
                    <div style='font-size: 20px; color: #FF6B6B; margin-bottom: 15px;'>
                        Corrected Sentence:
                    </div>
                    <div style='
                        margin: 20px;
                        padding: 20px;
                        background: linear-gradient(145deg, #f8f9fa, #e9ecef);
                        border-radius: 15px;
                        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
                    '>
                        <div style='
                            color: #40C057;
                            padding: 15px;
                            background: white;
                            border-radius: 10px;
                            font-size: 18px;
                        '>{corrected_text}</div>
                    </div>
                </div>
            """
        show_popup()

    def show_error_popup(message):
        popup_title.value = """
            <div style='text-align: center; animation: fadeIn 0.5s ease-out;'>
                <div style='color: #FF6B6B; font-size: 24px; margin-bottom: 10px;'>⚠️ දෝෂයක්</div>
            </div>
        """
        popup_message.value = f"""
            <div style='text-align: center; animation: fadeIn 0.5s ease-out;'>
                <div style='font-size: 18px; color: #FF6B6B; margin: 20px 0;'>{message}</div>
            </div>
        """
        show_popup()

    def show_popup():
        popup_box.layout.display = 'flex'
        popup_box.layout.opacity = '0'
        popup_box.layout.transition = 'all 0.3s ease-in'
        popup_box.layout.opacity = '1'

    def close_popup(change):
        popup_box.layout.transition = 'all 0.3s ease-out'
        popup_box.layout.opacity = '0'
        time.sleep(0.3)
        popup_box.layout.display = 'none'

    def reset_button():
        check_button.disabled = False
        check_button.description = "Check grammar "
        loading_indicator.layout.display = 'none'

    # Enhanced loading indicator
    loading_indicator = widgets.HTML(
        value="""
        <div style='display: flex; justify-content: center; align-items: center; margin: 15px 0;'>
            <div style='
                width: 50px;
                height: 50px;
                border: 5px solid #f3f3f3;
                border-top: 5px solid #5C7CFA;
                border-radius: 50%;
                animation: spin 1s linear infinite;
                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            '></div>
        </div>
        <style>
            @keyframes spin {
                0% { transform: rotate(0deg); }
                100% { transform: rotate(360deg); }
            }
        </style>
        """,
        layout=widgets.Layout(display='none')
    )

    # Enhanced text input
    text_box = widgets.Textarea(
        value="",
        placeholder="Type your sentence here...",
        layout=widgets.Layout(
            width='100%',
            height='150px',
            padding="20px",
            margin="15px 0",
            border_radius="15px",
            font_size="16px",
            box_shadow="0 4px 6px rgba(0, 0, 0, 0.1)"
        )
    )

    # Enhanced check button
    check_button = widgets.Button(
        description="Check grammar and spelling",
        button_style="primary",
        icon="check-circle",
        layout=widgets.Layout(
            width="200px",
            height="50px",
            margin="20px auto",
            align_items='center',
            font_weight="bold"

        )
    )

    # Enhanced header
    header = widgets.HTML("""
        <div style='text-align: center; padding: 30px 0; animation: fadeIn 1s ease-out;'>
            <div style='
                display: inline-block;
                padding: 20px 40px;
                background: linear-gradient(135deg, #5C7CFA, #748FFC);
                border-radius: 20px;
                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            '>
                <h2 style='
                    color: white;
                    font-size: 32px;
                    margin: 0;
                    text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.2);
                '>Sinhala Grammar Checker</h2>
            </div>
        </div>
    """)

    # Enhanced popup components
    popup_title = widgets.HTML(value="")
    popup_message = widgets.HTML(value="")
    close_button = widgets.Button(
        description="Close",
        button_style="danger",
        icon="times",
        layout=widgets.Layout(
            width="150px",
            height="40px",
            margin="20px 0 0 0",
            border_radius="20px"
        )
    )
    close_button.on_click(close_popup)

    # Enhanced popup box
    popup_box = widgets.VBox(
        [popup_title, popup_message, close_button],
        layout=widgets.Layout(
            display='none',
            position='fixed',
            margin='0 auto',
            transform='translate(-50%, -50%)',
            padding='30px',
            border='none',
            border_radius='20px',
            background_color='white',
            box_shadow='0 10px 25px rgba(0, 0, 0, 0.2)',
            width='800px',
            align_items='center'
        )
    )

    # Main container with enhanced layout
    main_container = widgets.VBox(
        [header, text_box, check_button, loading_indicator, popup_box],
        layout=widgets.Layout(
            padding='20px',
            width='100%',
            max_width='800px',
            margin='0 auto',
            background_color='#f8f9fa',
            border_radius='25px',
            box_shadow='0 8px 16px rgba(0, 0, 0, 0.1)'
        )
    )

    check_button.on_click(check_spelling)
    display(main_container)

sinhala_spell_checker_with_popup()