In [1]:
!pip install spacy
!python -m spacy download en_core_web_sm
!pip install nltk
!pip install textblob
!pip install textstat


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m100.2 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Collecting textstat
  Downloading textstat-0.7.4-py3-none-any.whl.metadata (14 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.0-py3-none-any.whl.metadata (3.2 kB)
Downloading textstat-0.7.4-py3-none-any.whl (105 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.1/105.1 kB[0m [31m3.0 MB/s[0m eta [36

In [13]:
import pandas as pd
import spacy
import nltk
from textstat import textstat
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from tqdm.notebook import tqdm
from concurrent.futures import ThreadPoolExecutor
import re
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from spacy.tokens import Doc
from textblob import TextBlob

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Load SpaCy model with GPU if available
spacy.prefer_gpu()
nlp = spacy.load('en_core_web_sm')
nlp.add_pipe('sentencizer')  # Add sentencizer for sentence boundary detection

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


<spacy.pipeline.sentencizer.Sentencizer at 0x7934dc87d1c0>

In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, cohen_kappa_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, SimpleRNN
from tensorflow.keras.callbacks import ModelCheckpoint
import joblib

# Load the dataset
dataset = pd.read_csv('Updated_Processed_Data.csv')

# Features and target
X = dataset.drop(columns=['final_score', 'essay', 'essay_id', 'clean_essay'])  # Drop non-predictive or target columns
y = dataset['final_score']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

# Evaluation function
def evaluate_model(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    qwk = cohen_kappa_score(y_true, np.round(y_pred), weights='quadratic')
    print(f"{model_name} Evaluation:")
    print(f"MSE: {mse}, MAE: {mae}, QWK: {qwk}\n")
    return mse, mae, qwk

# Linear Regression
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
lr_pred = lr.predict(X_test_scaled)
evaluate_model(y_test, lr_pred, "Linear Regression")

# Save Linear Regression model
joblib.dump(lr, 'linear_regression_model.pkl')

# ANN Model
ann = Sequential([
    Dense(64, activation='relu', input_dim=X_train_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
ann.compile(optimizer='adam', loss='mse', metrics=['mae'])
ann_checkpoint = ModelCheckpoint('ann_model.keras', save_best_only=True)
ann.fit(X_train_scaled, y_train, validation_split=0.2, epochs=50, batch_size=32, callbacks=[ann_checkpoint], verbose=1)
ann_pred = ann.predict(X_test_scaled).flatten()
evaluate_model(y_test, ann_pred, "ANN")

# LSTM Model
X_train_lstm = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_lstm = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

lstm = Sequential([
    LSTM(64, activation='tanh', input_shape=(1, X_train_scaled.shape[1]), return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
lstm.compile(optimizer='adam', loss='mse', metrics=['mae'])
lstm_checkpoint = ModelCheckpoint('lstm_model.keras', save_best_only=True)
lstm.fit(X_train_lstm, y_train, validation_split=0.2, epochs=50, batch_size=32, callbacks=[lstm_checkpoint], verbose=1)
lstm_pred = lstm.predict(X_test_lstm).flatten()
evaluate_model(y_test, lstm_pred, "LSTM")

# RNN Model
rnn = Sequential([
    SimpleRNN(64, activation='tanh', input_shape=(1, X_train_scaled.shape[1]), return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
rnn.compile(optimizer='adam', loss='mse', metrics=['mae'])
rnn_checkpoint = ModelCheckpoint('rnn_model.keras', save_best_only=True)
rnn.fit(X_train_lstm, y_train, validation_split=0.2, epochs=50, batch_size=32, callbacks=[rnn_checkpoint], verbose=1)
rnn_pred = rnn.predict(X_test_lstm).flatten()
evaluate_model(y_test, rnn_pred, "RNN")

# Function to predict essay scores using trained models
def predict_essay_score(features):
    # Load scaler and models
    scaler = joblib.load('scaler.pkl')
    lr_model = joblib.load('linear_regression_model.pkl')
    ann_model = Sequential([
        Dense(64, activation='relu', input_dim=len(features)),
        Dense(32, activation='relu'),
        Dense(1, activation='linear')
    ])
    ann_model.load_weights('ann_model.keras')
    lstm_model = Sequential([
        LSTM(64, activation='tanh', input_shape=(1, len(features)), return_sequences=False),
        Dense(32, activation='relu'),
        Dense(1, activation='linear')
    ])
    lstm_model.load_weights('lstm_model.keras')
    rnn_model = Sequential([
        SimpleRNN(64, activation='tanh', input_shape=(1, len(features)), return_sequences=False),
        Dense(32, activation='relu'),
        Dense(1, activation='linear')
    ])
    rnn_model.load_weights('rnn_model.keras')

    # Scale features
    features_scaled = scaler.transform([features])
    features_scaled_lstm = features_scaled.reshape(1, 1, len(features))

    # Predict using each model
    predictions = {
        'Linear Regression': lr_model.predict(features_scaled)[0],
        'ANN': ann_model.predict(features_scaled)[0][0],
        'LSTM': lstm_model.predict(features_scaled_lstm)[0][0],
        'RNN': rnn_model.predict(features_scaled_lstm)[0][0]
    }
    return predictions

# Function to score example essays
def score_example_essays(example_features):
    print("Scoring example essays with all trained models:\n")
    for idx, features in enumerate(example_features):
        print(f"Example Essay {idx + 1}:")
        predictions = predict_essay_score(features)
        for model_name, score in predictions.items():
            print(f"{model_name}: {score:.2f}")
        print("\n")

print("All models trained and saved.")


Linear Regression Evaluation:
MSE: 4.2607014105933905, MAE: 1.6412571645379062, QWK: 0.45173544773977703

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 17.9077 - mae: 3.3985 - val_loss: 9.5457 - val_mae: 1.9171
Epoch 2/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 5.6167 - mae: 1.7950 - val_loss: 5.4194 - val_mae: 1.6731
Epoch 3/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 4.1285 - mae: 1.5687 - val_loss: 3.6695 - val_mae: 1.4659
Epoch 4/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 3.3552 - mae: 1.4134 - val_loss: 3.3766 - val_mae: 1.3585
Epoch 5/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2.9720 - mae: 1.3245 - val_loss: 3.4043 - val_mae: 1.3223
Epoch 6/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2.7518 - mae: 1.2650 - val_loss: 3.6917 - val_mae: 1.2839
Epoch 7/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2.68

  super().__init__(**kwargs)


[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - loss: 22.2183 - mae: 3.9300 - val_loss: 4.4223 - val_mae: 1.6224
Epoch 2/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 4.0428 - mae: 1.5408 - val_loss: 3.3165 - val_mae: 1.3917
Epoch 3/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 3.1888 - mae: 1.3670 - val_loss: 2.9437 - val_mae: 1.3110
Epoch 4/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.8477 - mae: 1.2921 - val_loss: 2.7791 - val_mae: 1.2665
Epoch 5/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.7371 - mae: 1.2611 - val_loss: 2.6662 - val_mae: 1.2248
Epoch 6/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.5176 - mae: 1.2097 - val_loss: 2.6634 - val_mae: 1.2216
Epoch 7/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.49

  super().__init__(**kwargs)


[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - loss: 19.5457 - mae: 3.6158 - val_loss: 4.4431 - val_mae: 1.6327
Epoch 2/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3.8197 - mae: 1.5041 - val_loss: 3.2601 - val_mae: 1.3854
Epoch 3/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 3.1817 - mae: 1.3690 - val_loss: 3.0688 - val_mae: 1.3484
Epoch 4/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 3.0033 - mae: 1.3356 - val_loss: 2.8473 - val_mae: 1.2756
Epoch 5/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2.7547 - mae: 1.2685 - val_loss: 2.8032 - val_mae: 1.2642
Epoch 6/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2.6213 - mae: 1.2367 - val_loss: 2.7530 - val_mae: 1.2463
Epoch 7/50
[1m260/260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2.61

In [31]:
# Import necessary libraries
import spacy
import nltk
from textstat import textstat
from nltk.tokenize import word_tokenize, sent_tokenize
from textblob import TextBlob
import re
import numpy as np

# Load Spacy model
nlp = spacy.load('en_core_web_sm')

# Define feature extraction functions
def calculate_readability(text):
    return textstat.flesch_reading_ease(text)

def calculate_punctuation_score(text):
    punctuation_count = sum([1 for char in text if char in "!?.;"])
    return punctuation_count / len(text.split()) if len(text.split()) > 0 else 0

def calculate_vocabulary_richness(text):
    words = word_tokenize(text)
    unique_words = set(words)
    return len(unique_words) / len(words) if words else 0

def calculate_complex_sentence_ratio(text):
    doc = nlp(text)
    complex_sentences = sum(1 for sent in doc.sents if sum(1 for token in sent if token.dep_ != 'punct') > 10)
    return complex_sentences / len(list(doc.sents)) if len(list(doc.sents)) > 0 else 0

def calculate_clause_density(text):
    doc = nlp(text)
    clauses = sum(len(list(token.subtree)) for token in doc if token.dep_ in ('csubj', 'advcl', 'acl', 'relcl'))
    return clauses / len(list(doc.sents)) if len(list(doc.sents)) > 0 else 0

def calculate_semantic_coherence(text):
    sentences = sent_tokenize(text)
    if len(sentences) < 2:
        return 0
    embeddings = [nlp(sent).vector for sent in sentences]
    cosine_similarities = [
        (embeddings[i] @ embeddings[i+1].T) / (np.linalg.norm(embeddings[i]) * np.linalg.norm(embeddings[i+1]))
        for i in range(len(embeddings) - 1)
    ]
    return sum(cosine_similarities) / len(cosine_similarities)

def calculate_sentiment_subjectivity(text):
    blob = TextBlob(text)
    return blob.sentiment.subjectivity

def calculate_transitional_phrase_use(text):
    transitional_phrases = ["however", "therefore", "moreover", "furthermore", "nevertheless"]
    words = word_tokenize(text.lower())
    return sum(1 for word in words if word in transitional_phrases) / len(words) if len(words) > 0 else 0

def calculate_figurative_language_use(text):
    return len(re.findall(r"like|as if|seems|metaphorically", text.lower())) / len(text.split()) if len(text.split()) > 0 else 0

def calculate_question_usage(text):
    return text.count('?') / len(sent_tokenize(text)) if text else 0

# Example essays
bad_essay = "Computers are good. They help people do things. I like computers. They are fun. Everyone uses them. They are nice."
great_essay = "In today's world, computers have revolutionized how we work and communicate. They enable efficient problem-solving and enhance productivity across diverse industries. With access to vast resources, individuals can learn and innovate at unprecedented levels. Despite some challenges, the benefits of technology far outweigh the drawbacks, and computers have undoubtedly become indispensable tools for progress."

def extract_features(essay):
    return [
        0,  # Placeholder for 'Unnamed: 0', irrelevant in testing but retained for structure
        1,  # Placeholder for 'essay_set', needs contextual value
        len(essay),  # char_count
        len(word_tokenize(essay)),  # word_count
        len(sent_tokenize(essay)),  # sent_count
        np.mean([len(word) for word in word_tokenize(essay)]) if word_tokenize(essay) else 0,  # avg_word_len
        len(re.findall(r'\b[a-z]+\b', essay)),  # spell_err_count (placeholder)
        sum(1 for token in nlp(essay) if token.pos_ == "NOUN"),  # noun_count
        sum(1 for token in nlp(essay) if token.pos_ == "ADJ"),  # adj_count
        sum(1 for token in nlp(essay) if token.pos_ == "VERB"),  # verb_count
        sum(1 for token in nlp(essay) if token.pos_ == "ADV"),  # adv_count
        calculate_readability(essay),
        calculate_punctuation_score(essay),
        calculate_vocabulary_richness(essay),
        calculate_complex_sentence_ratio(essay),
        calculate_clause_density(essay),
        calculate_semantic_coherence(essay),
        calculate_sentiment_subjectivity(essay),
        calculate_transitional_phrase_use(essay),
        calculate_figurative_language_use(essay),
        calculate_question_usage(essay)
    ]

# Extract features for the essays
bad_essay_features = extract_features(bad_essay)
great_essay_features = extract_features(great_essay)

# Output the extracted features
print("Bad Essay Features:", bad_essay_features)
print("Great Essay Features:", great_essay_features)


Bad Essay Features: [0, 1, 114, 26, 6, 3.6538461538461537, 14, 4, 3, 4, 0, 85.05, 0.3, 0.6538461538461539, 0.0, 0.0, 0.614905059337616, 0.6, 0.0, 0.05, 0.0]
Great Essay Features: [0, 1, 409, 63, 4, 5.650793650793651, 52, 18, 5, 9, 2, 23.93, 0.07407407407407407, 0.8095238095238095, 1.0, 0.0, 0.553426076968511, 0.95, 0.0, 0.0, 0.0]


In [32]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, SimpleRNN, Input

# Load saved models and scaler
scaler = joblib.load('scaler.pkl')
lr_model = joblib.load('linear_regression_model.pkl')

ann_model = Sequential([
    Input(shape=(len(bad_essay_features),)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
ann_model.load_weights('ann_model.keras')

lstm_model = Sequential([
    Input(shape=(1, len(bad_essay_features))),
    LSTM(64, activation='tanh', return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
lstm_model.load_weights('lstm_model.keras')

rnn_model = Sequential([
    Input(shape=(1, len(bad_essay_features))),
    SimpleRNN(64, activation='tanh', return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
rnn_model.load_weights('rnn_model.keras')

# Scale features
bad_essay_scaled = scaler.transform([bad_essay_features])
great_essay_scaled = scaler.transform([great_essay_features])

bad_essay_scaled_lstm = bad_essay_scaled.reshape(1, 1, len(bad_essay_features))
great_essay_scaled_lstm = great_essay_scaled.reshape(1, 1, len(great_essay_features))

# Predict scores for bad essay
bad_essay_predictions = {
    'Linear Regression': lr_model.predict(bad_essay_scaled)[0],
    'ANN': ann_model.predict(bad_essay_scaled)[0][0],
    'LSTM': lstm_model.predict(bad_essay_scaled_lstm)[0][0],
    'RNN': rnn_model.predict(bad_essay_scaled_lstm)[0][0]
}

# Predict scores for great essay
great_essay_predictions = {
    'Linear Regression': lr_model.predict(great_essay_scaled)[0],
    'ANN': ann_model.predict(great_essay_scaled)[0][0],
    'LSTM': lstm_model.predict(great_essay_scaled_lstm)[0][0],
    'RNN': rnn_model.predict(great_essay_scaled_lstm)[0][0]
}

# Clip predictions to range [0, 10]
def clip_scores(predictions):
    return {model: max(0, min(10, score)) for model, score in predictions.items()}

# Extract features for the essays
bad_essay_features = extract_features(bad_essay)
great_essay_features = extract_features(great_essay)

# Display features
print("Bad Essay Features:", bad_essay_features)
print("Great Essay Features:", great_essay_features)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Bad Essay Features: [0, 1, 114, 26, 6, 3.6538461538461537, 14, 4, 3, 4, 0, 85.05, 0.3, 0.6538461538461539, 0.0, 0.0, 0.614905059337616, 0.6, 0.0, 0.05, 0.0]
Great Essay Features: [0, 1, 409, 63, 4, 5.650793650793651, 52, 18, 5, 9, 2, 23.93, 0.07407407407407407, 0.8095238095238095, 1.0, 0.0, 0.553426076968511, 0.95, 0.0, 0.0, 0.0]


In [33]:
# Load saved models and scaler
scaler = joblib.load('scaler.pkl')
lr_model = joblib.load('linear_regression_model.pkl')

ann_model = Sequential([
    Input(shape=(len(bad_essay_features),)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
ann_model.load_weights('ann_model.keras')

lstm_model = Sequential([
    Input(shape=(1, len(bad_essay_features))),
    LSTM(64, activation='tanh', return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
lstm_model.load_weights('lstm_model.keras')

rnn_model = Sequential([
    Input(shape=(1, len(bad_essay_features))),
    SimpleRNN(64, activation='tanh', return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
rnn_model.load_weights('rnn_model.keras')

# Scale features
bad_essay_scaled = scaler.transform([bad_essay_features])
great_essay_scaled = scaler.transform([great_essay_features])

bad_essay_scaled_lstm = bad_essay_scaled.reshape(1, 1, len(bad_essay_features))
great_essay_scaled_lstm = great_essay_scaled.reshape(1, 1, len(great_essay_features))

# Predict scores for bad essay
bad_essay_predictions = {
    'Linear Regression': lr_model.predict(bad_essay_scaled)[0],
    'ANN': ann_model.predict(bad_essay_scaled)[0][0],
    'LSTM': lstm_model.predict(bad_essay_scaled_lstm)[0][0],
    'RNN': rnn_model.predict(bad_essay_scaled_lstm)[0][0]
}

# Predict scores for great essay
great_essay_predictions = {
    'Linear Regression': lr_model.predict(great_essay_scaled)[0],
    'ANN': ann_model.predict(great_essay_scaled)[0][0],
    'LSTM': lstm_model.predict(great_essay_scaled_lstm)[0][0],
    'RNN': rnn_model.predict(great_essay_scaled_lstm)[0][0]
}

# Clip predictions to range [0, 10]
def clip_scores(predictions):
    return {model: max(0, min(10, score)) for model, score in predictions.items()}

bad_essay_predictions = clip_scores(bad_essay_predictions)
great_essay_predictions = clip_scores(great_essay_predictions)

# Output predictions
print("Bad Essay Predictions:", bad_essay_predictions)
print("Great Essay Predictions:", great_essay_predictions)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 217ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Bad Essay Predictions: {'Linear Regression': 2.5031762898919223, 'ANN': 0.14068669, 'LSTM': 1.8740671, 'RNN': 0.85748106}
Great Essay Predictions: {'Linear Regression': 7.784317484317058, 'ANN': 4.6780076, 'LSTM': 1.8288155, 'RNN': 2.4256368}


In [37]:
great_essay = "In the 21st century, technology has revolutionized nearly every aspect of our lives, including the field of education. It has transformed traditional teaching methods, expanded access to knowledge, and provided new opportunities for students and educators alike. While some skeptics argue that reliance on technology diminishes critical thinking and interpersonal skills, the benefits of incorporating technology in education far outweigh the drawbacks. Technology serves as a bridge to equal educational opportunities, fosters creativity, and equips students with skills essential for the future. One of the most significant advantages of technology in education is the democratization of learning. In the past, access to quality education was often limited by geographic location, financial constraints, or a lack of resources. Today, online platforms and digital tools have made it possible for students from all corners of the globe to access top-notch educational content. Websites like Khan Academy, Coursera, and edX offer free or affordable courses taught by leading educators and institutions. Students in remote or underprivileged areas can now learn the same material as their peers in more affluent regions, leveling the playing field and fostering a sense of global community. Furthermore, technology empowers educators to create more engaging and personalized learning experiences. Traditional classrooms often rely on a one-size-fits-all approach, which may not cater to the diverse needs of students. With the advent of learning management systems and data analytics, teachers can identify individual students’ strengths and weaknesses and tailor their teaching strategies accordingly. For instance, adaptive learning software adjusts the difficulty of lessons based on a student’s performance, ensuring that they remain challenged without becoming overwhelmed. This level of personalization not only improves academic outcomes but also boosts students’ confidence and motivation. Another remarkable contribution of technology to education is its ability to enhance creativity and critical thinking. Digital tools such as graphic design software, coding platforms, and virtual reality applications allow students to express themselves in innovative ways. For example, instead of writing a traditional book report, students can create multimedia presentations, interactive websites, or even animated films to demonstrate their understanding of a topic. These creative projects encourage students to think critically, collaborate with their peers, and develop problem-solving skills—abilities that are highly valued in today’s workforce. Moreover, technology plays a crucial role in preparing students for the future. The rapid pace of technological advancement means that many of the jobs today’s students will occupy do not yet exist. Familiarity with digital tools and an understanding of emerging technologies are essential for success in a constantly evolving job market. Schools that integrate technology into their curricula equip students with the skills they need to navigate and thrive in this environment. Coding classes, for instance, teach logical reasoning and computational thinking, while digital literacy programs emphasize the responsible and effective use of technology. Despite these advantages, it is important to acknowledge the challenges associated with technology in education. Critics often point to the potential for distraction, as students may be tempted to use devices for non-educational purposes. Additionally, overreliance on technology can lead to reduced interpersonal interactions and a diminished capacity for critical thinking. However, these issues can be mitigated through proper guidance and the implementation of balanced, well-thought-out policies. Teachers and administrators play a vital role in ensuring that technology is used as a tool to enhance learning rather than a substitute for meaningful engagement. In conclusion, technology has undeniably reshaped the landscape of education, offering unprecedented opportunities for students and educators alike. By breaking down barriers to access, personalizing learning, fostering creativity, and preparing students for the future, technology has become an indispensable component of modern education. While it is essential to address its challenges, the potential benefits of integrating technology into education are too significant to ignore. As we move forward, it is imperative that we continue to explore innovative ways to harness the power of technology, ensuring that it serves as a catalyst for growth and development in education"


# Extract features for the essays
bad_essay_features = extract_features(bad_essay)
great_essay_features = extract_features(great_essay)

# Display features
print("Bad Essay Features:", bad_essay_features)
print("Great Essay Features:", great_essay_features)


# Load saved models and scaler
scaler = joblib.load('scaler.pkl')
lr_model = joblib.load('linear_regression_model.pkl')

ann_model = Sequential([
    Input(shape=(len(bad_essay_features),)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
ann_model.load_weights('ann_model.keras')

lstm_model = Sequential([
    Input(shape=(1, len(bad_essay_features))),
    LSTM(64, activation='tanh', return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
lstm_model.load_weights('lstm_model.keras')

rnn_model = Sequential([
    Input(shape=(1, len(bad_essay_features))),
    SimpleRNN(64, activation='tanh', return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
rnn_model.load_weights('rnn_model.keras')

# Scale features
bad_essay_scaled = scaler.transform([bad_essay_features])
great_essay_scaled = scaler.transform([great_essay_features])

bad_essay_scaled_lstm = bad_essay_scaled.reshape(1, 1, len(bad_essay_features))
great_essay_scaled_lstm = great_essay_scaled.reshape(1, 1, len(great_essay_features))

# Predict scores for bad essay
bad_essay_predictions = {
    'Linear Regression': lr_model.predict(bad_essay_scaled)[0],
    'ANN': ann_model.predict(bad_essay_scaled)[0][0],
    'LSTM': lstm_model.predict(bad_essay_scaled_lstm)[0][0],
    'RNN': rnn_model.predict(bad_essay_scaled_lstm)[0][0]
}

# Predict scores for great essay
great_essay_predictions = {
    'Linear Regression': lr_model.predict(great_essay_scaled)[0],
    'ANN': ann_model.predict(great_essay_scaled)[0][0],
    'LSTM': lstm_model.predict(great_essay_scaled_lstm)[0][0],
    'RNN': rnn_model.predict(great_essay_scaled_lstm)[0][0]
}

# Clip predictions to range [0, 10]
def clip_scores(predictions):
    return {model: max(0, min(10, score)) for model, score in predictions.items()}

bad_essay_predictions = clip_scores(bad_essay_predictions)
great_essay_predictions = clip_scores(great_essay_predictions)

# Output predictions
print("Bad Essay Predictions:", bad_essay_predictions)
print("Great Essay Predictions:", great_essay_predictions)


Bad Essay Features: [0, 1, 114, 26, 6, 3.6538461538461537, 14, 4, 3, 4, 0, 85.05, 0.3, 0.6538461538461539, 0.0, 0.0, 0.614905059337616, 0.6, 0.0, 0.05, 0.0]
Great Essay Features: [0, 1, 4649, 737, 32, 5.423337856173677, 628, 213, 70, 94, 29, 16.93, 0.04747320061255743, 0.5006784260515604, 1.0, 6.5, 0.5454023528483606, 0.514847423818012, 0.004070556309362279, 0.004594180704441042, 0.0]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Bad Essay Predictions: {'Linear Regression': 2.5031762898919223, 'ANN': 0.14068669, 'LSTM': 1.8740671, 'RNN': 0.85748106}
Great Essay Predictions: {'Linear Regression': 10, 'ANN': 9.914194, 'LSTM': 1.3359907, 'RNN': 3.8745875}


In [38]:
great_essay = "The author concludes the story with this paragraph to show SaengÂ’s determination. Saeng has been through a lot and misses what its like at home. She feels different and out of place but she is still strong. Life goes on, and this paragraph shows that Saeng with move with it. The paragraph says, Â“in the spring, when the snows melt and the geese return and this hibiscus is budding, then I will take that test again.Â” Spring will come, as nothing can stop time. The snow will melt because the weather changes. The geese will fly home and the hibiscus will bloom in spring. Saeng takes a positive look at things and decides to join them. Sure she failed once but she is strong and willing to try again. If at first you donÂ’t succeed, try, try again. Saeng will always have her memories of home but she is willing to change to her new sorroundings. The concluding paragraph shows SaengÂ’s determination to succeed and survive in the new world."


# Extract features for the essays
bad_essay_features = extract_features(bad_essay)
great_essay_features = extract_features(great_essay)

# Display features
print("Bad Essay Features:", bad_essay_features)
print("Great Essay Features:", great_essay_features)


# Load saved models and scaler
scaler = joblib.load('scaler.pkl')
lr_model = joblib.load('linear_regression_model.pkl')

ann_model = Sequential([
    Input(shape=(len(bad_essay_features),)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
ann_model.load_weights('ann_model.keras')

lstm_model = Sequential([
    Input(shape=(1, len(bad_essay_features))),
    LSTM(64, activation='tanh', return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
lstm_model.load_weights('lstm_model.keras')

rnn_model = Sequential([
    Input(shape=(1, len(bad_essay_features))),
    SimpleRNN(64, activation='tanh', return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
rnn_model.load_weights('rnn_model.keras')

# Scale features
bad_essay_scaled = scaler.transform([bad_essay_features])
great_essay_scaled = scaler.transform([great_essay_features])

bad_essay_scaled_lstm = bad_essay_scaled.reshape(1, 1, len(bad_essay_features))
great_essay_scaled_lstm = great_essay_scaled.reshape(1, 1, len(great_essay_features))

# Predict scores for bad essay
bad_essay_predictions = {
    'Linear Regression': lr_model.predict(bad_essay_scaled)[0],
    'ANN': ann_model.predict(bad_essay_scaled)[0][0],
    'LSTM': lstm_model.predict(bad_essay_scaled_lstm)[0][0],
    'RNN': rnn_model.predict(bad_essay_scaled_lstm)[0][0]
}

# Predict scores for great essay
great_essay_predictions = {
    'Linear Regression': lr_model.predict(great_essay_scaled)[0],
    'ANN': ann_model.predict(great_essay_scaled)[0][0],
    'LSTM': lstm_model.predict(great_essay_scaled_lstm)[0][0],
    'RNN': rnn_model.predict(great_essay_scaled_lstm)[0][0]
}

# Clip predictions to range [0, 10]
def clip_scores(predictions):
    return {model: max(0, min(10, score)) for model, score in predictions.items()}

bad_essay_predictions = clip_scores(bad_essay_predictions)
great_essay_predictions = clip_scores(great_essay_predictions)

# Output predictions
print("Bad Essay Predictions:", bad_essay_predictions)
print("Great Essay Predictions:", great_essay_predictions)


Bad Essay Features: [0, 1, 114, 26, 6, 3.6538461538461537, 14, 4, 3, 4, 0, 85.05, 0.3, 0.6538461538461539, 0.0, 0.0, 0.614905059337616, 0.6, 0.0, 0.05, 0.0]
Great Essay Features: [0, 1, 945, 198, 12, 3.919191919191919, 155, 32, 9, 29, 10, 92.02, 0.07647058823529412, 0.5454545454545454, 0.6, 2.3333333333333335, 0.4758890894326297, 0.5879564879564879, 0.0, 0.0058823529411764705, 0.0]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Bad Essay Predictions: {'Linear Regression': 2.5031762898919223, 'ANN': 0.14068669, 'LSTM': 1.8740671, 'RNN': 0.85748106}
Great Essay Predictions: {'Linear Regression': 9.218299661726547, 'ANN': 3.817739, 'LSTM': 0.50579363, 'RNN': 2.6469889}


In [41]:
great_essay = "Dear is a rumor going around saying that computers are a negitive affect in a childs life. I disagree. A computer can help a kid in so many ways, it can help students with homework, let students talk to friends, and help with school projects. And besides, most kids don't don't stay on the computer very long anyways. Homeworks, it's a big role in school, if you don't do it or worse don't get it can you down. That's why computers have websites that help a child learn how do do a problem or a social studies question if they don't know. If you go to the school web page then theres a link that brings you in a math games. Theres even a to a science website. This helps a child so they don't get stressed out over homework. A computer also helps a students interact with friends. A social life is very important to a grater. This is where a computer can have aim, facebook, and even myspace help a kid impact with friends. If a kid got into a fight at school can come home and to work it out and not leave it unseatled. A computer also new friends from other towns. Teens need to and make new friesnd will helps as in life. If a teen helps with a then they can ask a for help. Every student gets a take home project at social point in the time of running out to the store to buy paper and pencils. you can stay home, have money and typee the essay. Computers can help with a social studies or a science project. You can print pictures and find information. Drawing can be a hastle and messy, so it saves time and the mess. If are words you dont understandd then you can look them out. Computers are and best way to do a project. Computers, everyone uses them, they help our parents just as much as uss. They help us with homework, let us talk to friends, and even help us do our projects! Computers are one of the best technology we have"


# Extract features for the essays
bad_essay_features = extract_features(bad_essay)
great_essay_features = extract_features(great_essay)

# Display features
print("Bad Essay Features:", bad_essay_features)
print("Great Essay Features:", great_essay_features)


# Load saved models and scaler
scaler = joblib.load('scaler.pkl')
lr_model = joblib.load('linear_regression_model.pkl')

ann_model = Sequential([
    Input(shape=(len(bad_essay_features),)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
ann_model.load_weights('ann_model.keras')

lstm_model = Sequential([
    Input(shape=(1, len(bad_essay_features))),
    LSTM(64, activation='tanh', return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
lstm_model.load_weights('lstm_model.keras')

rnn_model = Sequential([
    Input(shape=(1, len(bad_essay_features))),
    SimpleRNN(64, activation='tanh', return_sequences=False),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])
rnn_model.load_weights('rnn_model.keras')

# Scale features
bad_essay_scaled = scaler.transform([bad_essay_features])
great_essay_scaled = scaler.transform([great_essay_features])

bad_essay_scaled_lstm = bad_essay_scaled.reshape(1, 1, len(bad_essay_features))
great_essay_scaled_lstm = great_essay_scaled.reshape(1, 1, len(great_essay_features))

# Predict scores for bad essay
bad_essay_predictions = {
    'Linear Regression': lr_model.predict(bad_essay_scaled)[0],
    'ANN': ann_model.predict(bad_essay_scaled)[0][0],
    'LSTM': lstm_model.predict(bad_essay_scaled_lstm)[0][0],
    'RNN': rnn_model.predict(bad_essay_scaled_lstm)[0][0]
}

# Predict scores for great essay
great_essay_predictions = {
    'Linear Regression': lr_model.predict(great_essay_scaled)[0],
    'ANN': ann_model.predict(great_essay_scaled)[0][0],
    'LSTM': lstm_model.predict(great_essay_scaled_lstm)[0][0],
    'RNN': rnn_model.predict(great_essay_scaled_lstm)[0][0]
}

# Clip predictions to range [0, 10]
def clip_scores(predictions):
    return {model: max(0, min(10, score)) for model, score in predictions.items()}

bad_essay_predictions = clip_scores(bad_essay_predictions)
great_essay_predictions = clip_scores(great_essay_predictions)

# Output predictions
print("Bad Essay Predictions:", bad_essay_predictions)
print("Great Essay Predictions:", great_essay_predictions)


Bad Essay Features: [0, 1, 114, 26, 6, 3.6538461538461537, 14, 4, 3, 4, 0, 85.05, 0.3, 0.6538461538461539, 0.0, 0.0, 0.614905059337616, 0.6, 0.0, 0.05, 0.0]
Great Essay Features: [0, 1, 1838, 404, 26, 3.6683168316831685, 340, 86, 22, 56, 19, 90.8, 0.0700280112044818, 0.42574257425742573, 0.6538461538461539, 5.923076923076923, 0.38767436414957046, 0.3217323232323232, 0.0, 0.0, 0.0]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 218ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Bad Essay Predictions: {'Linear Regression': 2.5031762898919223, 'ANN': 0.14068669, 'LSTM': 1.8740671, 'RNN': 0.85748106}
Great Essay Predictions: {'Linear Regression': 10, 'ANN': 5.570169, 'LSTM': 0.8563629, 'RNN': 3.8242378}
