In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

In [2]:
df = pd.read_csv('Coherence_and_Cohesion.csv')
X = df['question'] + ' ' + df['answer']  
y = df[['Structure_your_answers_in_logical_paragraphs', 'One_main_idea_per_paragraph', 'Include_an_introduction_and_conclusion', 'Support_main_points_with_an_explanation_and_then_an_example', 'Use_cohesive_linking_words_accurately_and_appropriately', 'Vary_your_linking_phrases_using_synonyms', 'score']]



In [3]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the pipeline
svm_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000, stop_words='english')),
    ('svm', SVC())
])

In [4]:
# Define hyperparameters to search
param_grid = {
    'tfidf__max_features': [5000, 10000, None],
    'tfidf__ngram_range': [(1, 1), (1, 2)],
    'svm__C': [0.1, 1, 10],
    'svm__kernel': ['rbf'],
    'svm__gamma': [0.1, 1, 'auto']
}

In [5]:
grid_search = GridSearchCV(svm_pipeline, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train['Structure_your_answers_in_logical_paragraphs'])  

In [6]:
# Print the best parameters
print("Best parameters found:", grid_search.best_params_)

Best parameters found: {'svm__C': 10, 'svm__gamma': 1, 'svm__kernel': 'rbf', 'tfidf__max_features': 5000, 'tfidf__ngram_range': (1, 1)}


In [7]:
predictions = grid_search.predict(X_test)


In [8]:
print("Classification Report:")
print(classification_report(y_test['Structure_your_answers_in_logical_paragraphs'], predictions))

Classification Report:
              precision    recall  f1-score   support

          No       0.84      1.00      0.91        58
         Yes       1.00      0.08      0.15        12

    accuracy                           0.84        70
   macro avg       0.92      0.54      0.53        70
weighted avg       0.87      0.84      0.78        70



In [9]:
import joblib
best_model = grid_search.best_estimator_
joblib.dump(best_model, 'Structure_your_answers_in_logical_paragraphs_svm_model_Coherence_and_Cohesion.pkl')

['Structure_your_answers_in_logical_paragraphs_svm_model_Coherence_and_Cohesion.pkl']

In [11]:
loaded_model = joblib.load('Structure_your_answers_in_logical_paragraphs_svm_model_Coherence_and_Cohesion.pkl')


In [14]:
# Take individual inputs without user interaction
input_question = "Levels of youth crime are increasing rapidly in most cities around the world. What are the reasons for this, and suggest some solutions."
input_answer = "Nowadays crime rate in young generation is escalating worldwide in most of the cities. This could be due to unemployment and parents’ negligence towards their children and this would be best solved by taking some efforts such as government should provide more employment opportunities for graduates and parents should spend more time with their children. The first primitive reason behind crime of adolescents is scarcity of jobs. Even though youngsters have degrees, certificates of universities but due to unavailability of jobs they did not get jobs which further forces them to choose the wrong path. For example, if youth did not get a job in their required field, they will try other ways to earn money like gambling, murders. Secondly, parents are busy in their hectic schedule that they do not have time to spend with their children. Children feel neglected due to the fact that they started doing things which are harmful to their future. However, without counseling and guidance, they opt for the wrong path which would be devastating for their future. To combat the problem of crime, the government has to take steps to eliminate the issue of unemployment. In other words, there should be some part-time jobs available for youngsters so that their minds do not get distracted after completion of their studies. While parents or guardians of children should spend some time in a week so that children will not feel alone and can share their feelings and concerns with them. Thus, it increases bonding between parents and children, and they will think twice before doing any harmful activity. To conclude, it is a joint effort of the government and parents to make the next generation in good standing so that they will increase the economy of the world instead of making it a strain on the world."

# Merge the input strings
merged_input = input_question + ' ' + input_answer

# Make a prediction using the loaded model
prediction = loaded_model.predict([merged_input])

print("Predicted output:", prediction)


Predicted output: ['No']
