In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('/Users/abdullahalsakib/Downloads/ielts/Ml_part/Task Achievement.csv')
X = df['Question'] + ' ' + df['Answer']  
y =df['Answer_all_parts_of_the_question'].replace('no', 'No')


# Use LabelEncoder for multi-class classification
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

svm_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000, stop_words='english')),
    ('svm', SVC())
])

param_grid = {
    'tfidf__max_features': [5000, 10000, None],
    'tfidf__ngram_range': [(1, 1), (1, 2)],
    'svm__C': [0.1, 1, 10],
    'svm__kernel': ['rbf'],
    'svm__gamma': [0.1, 1, 'auto']
}

grid_search = GridSearchCV(svm_pipeline, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best parameters found:", grid_search.best_params_)

predictions = grid_search.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, predictions))


Best parameters found: {'svm__C': 10, 'svm__gamma': 1, 'svm__kernel': 'rbf', 'tfidf__max_features': 10000, 'tfidf__ngram_range': (1, 2)}
Classification Report:
              precision    recall  f1-score   support

           0       0.43      0.53      0.48        30
           1       0.58      0.47      0.52        40

    accuracy                           0.50        70
   macro avg       0.50      0.50      0.50        70
weighted avg       0.51      0.50      0.50        70



In [8]:
import joblib
best_model = grid_search.best_estimator_
joblib.dump(best_model, 'Answer_all_parts_of_the_question.pkl')

['Answer_all_parts_of_the_question.pkl']

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('/Users/abdullahalsakib/Downloads/ielts/Ml_part/Task Achievement.csv')
X = df['Question'] + ' ' + df['Answer']  
y= df['Present_relevant_ideas'].replace({'no': 'No', 'yes': 'Yes'}, inplace=False)

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

svm_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000, stop_words='english')),
    ('svm', SVC())
])

param_grid = {
    'tfidf__max_features': [5000, 10000, None],
    'tfidf__ngram_range': [(1, 1), (1, 2)],
    'svm__C': [0.1, 1, 10],
    'svm__kernel': ['rbf'],
    'svm__gamma': [0.1, 1, 'auto']
}

grid_search = GridSearchCV(svm_pipeline, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best parameters found:", grid_search.best_params_)

predictions = grid_search.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, predictions))


Best parameters found: {'svm__C': 10, 'svm__gamma': 0.1, 'svm__kernel': 'rbf', 'tfidf__max_features': 10000, 'tfidf__ngram_range': (1, 1)}
Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.76      0.70        34
           1       0.73      0.61      0.67        36

    accuracy                           0.69        70
   macro avg       0.69      0.69      0.68        70
weighted avg       0.69      0.69      0.68        70



In [13]:
import joblib
best_model = grid_search.best_estimator_
joblib.dump(best_model, 'Present_relevant_ideas.pkl')

['Present_relevant_ideas.pkl']

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('/Users/abdullahalsakib/Downloads/ielts/Ml_part/Task Achievement.csv')
X = df['Question'] + ' ' + df['Answer']  
y= df['Fully_explain_these_ideas'].replace({'no': 'No', 'yes': 'Yes'}, inplace=False)

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

svm_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000, stop_words='english')),
    ('svm', SVC())
])

param_grid = {
    'tfidf__max_features': [5000, 10000, None],
    'tfidf__ngram_range': [(1, 1), (1, 2)],
    'svm__C': [0.1, 1, 10],
    'svm__kernel': ['rbf'],
    'svm__gamma': [0.1, 1, 'auto']
}

grid_search = GridSearchCV(svm_pipeline, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best parameters found:", grid_search.best_params_)

predictions = grid_search.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, predictions))




Best parameters found: {'svm__C': 0.1, 'svm__gamma': 0.1, 'svm__kernel': 'rbf', 'tfidf__max_features': 5000, 'tfidf__ngram_range': (1, 1)}
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.97      1.00      0.99        68

    accuracy                           0.97        70
   macro avg       0.49      0.50      0.49        70
weighted avg       0.94      0.97      0.96        70



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
import joblib
best_model = grid_search.best_estimator_
joblib.dump(best_model, 'Fully_explain_these_ideas.pkl')

['Fully_explain_these_ideas.pkl']

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('/Users/abdullahalsakib/Downloads/ielts/Ml_part/Task Achievement.csv')
X = df['Question'] + ' ' + df['Answer']  
y= df['Support_ideas_with_relevant, specific_examples'].replace({'no': 'No', 'yes': 'Yes'}, inplace=False)

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

svm_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000, stop_words='english')),
    ('svm', SVC())
])

param_grid = {
    'tfidf__max_features': [5000, 10000, None],
    'tfidf__ngram_range': [(1, 1), (1, 2)],
    'svm__C': [0.1, 1, 10],
    'svm__kernel': ['rbf'],
    'svm__gamma': [0.1, 1, 'auto']
}

grid_search = GridSearchCV(svm_pipeline, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best parameters found:", grid_search.best_params_)

predictions = grid_search.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, predictions))


Best parameters found: {'svm__C': 1, 'svm__gamma': 1, 'svm__kernel': 'rbf', 'tfidf__max_features': 10000, 'tfidf__ngram_range': (1, 2)}
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.62      0.67        34
           1       0.68      0.78      0.73        36

    accuracy                           0.70        70
   macro avg       0.70      0.70      0.70        70
weighted avg       0.70      0.70      0.70        70



In [20]:

import joblib
best_model = grid_search.best_estimator_
joblib.dump(best_model, 'specific_examples.pkl')

['specific_examples.pkl']

In [21]:
/Users/abdullahalsakib/Downloads/420115374_1042486203703131_7919544105684316365_n.png

Best Parameters: {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}
Cross-Validation Scores: [ 0.24374229 -0.19250573  0.13571601  0.07075526  0.24341595]
Mean Squared Error: 0.416052478884062


In [22]:
joblib.dump(final_svm_model, 'Task_score.joblib')
print('Model saved successfully.')

Model saved successfully.


In [23]:
joblib.dump(vectorizer, 'Task_score_tfidf_vectorizer.joblib')


['Task_score_tfidf_vectorizer.joblib']