In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Concatenate
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential, Model
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.callbacks import EarlyStopping 


In [2]:
data = pd.read_csv('ielts_writing_dataset.csv')


questions = data['Question'].astype(str)
answers = data['Essay'].astype(str)
marks = data['Overall'].astype(float)


In [3]:
max_words = 10000  
max_sequence_length = 100  

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(questions + answers)

question_sequences = tokenizer.texts_to_sequences(questions)
answer_sequences = tokenizer.texts_to_sequences(answers)

X = pad_sequences(question_sequences, maxlen=max_sequence_length)
X_ans = pad_sequences(answer_sequences, maxlen=max_sequence_length)

In [4]:
X_train, X_test, X_ans_train, X_ans_test, y_train, y_test = train_test_split(X, X_ans, marks, test_size=0.2, random_state=42)

embedding_dim = 100  
model = Sequential()

In [5]:
question_input = Sequential()
question_input.add(Embedding(max_words, embedding_dim, input_length=max_sequence_length))
question_input.add(Conv1D(128, 5, activation='relu'))
question_input.add(GlobalMaxPooling1D())

answer_input = Sequential()
answer_input.add(Embedding(max_words, embedding_dim, input_length=max_sequence_length))
answer_input.add(Conv1D(128, 5, activation='relu'))
answer_input.add(GlobalMaxPooling1D())

In [6]:
concatenated = Concatenate()([question_input.output, answer_input.output])
out = Dense(1, activation='linear')(concatenated)

model = Model(inputs=[question_input.input, answer_input.input], outputs=out)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

model.fit([X_train, X_ans_train], y_train, epochs=30, batch_size=64,validation_split=0.1,callbacks=[early_stopping])

y_pred = model.predict([X_test, X_ans_test])
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae:.2f}")

print(f"Mean Squared Error (MSE): {mse:.2f}")



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Mean Absolute Error (MAE): 0.70
Mean Squared Error (MSE): 0.83


In [7]:
new_question = ["How does photosynthesis work?"]
new_answer = ["Photosynthesis is the process by which plants convert light energy into chemical energy."]

new_question_seq = tokenizer.texts_to_sequences(new_question)
new_answer_seq = tokenizer.texts_to_sequences(new_answer)

new_question_padded = pad_sequences(new_question_seq, maxlen=max_sequence_length)
new_answer_padded = pad_sequences(new_answer_seq, maxlen=max_sequence_length)

predicted_mark = model.predict([new_question_padded, new_answer_padded])[0][0]
print(f"Predicted Mark: {predicted_mark:.2f}")

Predicted Mark: 4.46


In [8]:
model.save('ieltsscore.h5') 

  saving_api.save_model(
