In [1]:
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
from nltk.corpus import wordnet as wn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import model_selection, naive_bayes, svm
from sklearn.metrics import accuracy_score

In [55]:
train_set = pd.read_csv("./asap-aes/training_set_rel3.tsv", sep='\t', encoding="latin-1")

In [56]:
train_set = train_set[train_set['essay_set'] == 2]  # filter for set 2
train_set = train_set.reset_index() # resets index

In [57]:
train_set.drop(train_set.columns[0], axis=1, inplace=True) # removes old index
train_set.drop(train_set.columns[7:], axis=1, inplace=True) # filter only domain 1 scores
train_set.drop(train_set.columns[5], axis=1, inplace=True) # removes rater3_domain1 (NaN for all)

In [61]:
train_set['essay'] = [entry.lower() for entry in train_set['essay']] # lower case for all words in essay
train_set["essay"] = [word_tokenize(entry) for entry in train_set["essay"]] # break paragraphs string into tokens

In [64]:
train_set['avg_score'] = (train_set['rater1_domain1'] + train_set['rater2_domain1']) / 2 # calculate average score
train_set['avg_score'] = train_set['avg_score'].apply(np.ceil).astype(int) # round off average score

In [66]:
np.random.seed(500)
tag_map = defaultdict(lambda: wn.NOUN)
tag_map['J'] = wn.ADJ
tag_map['V'] = wn.VERB
tag_map['R'] = wn.ADV
count = 0
for index, entry in enumerate(train_set['essay']):
    final_words = []
    word_lemmatized = WordNetLemmatizer()
    
    for word, tag in pos_tag(entry):
        if word not in stopwords.words("english") and word.isalpha():
            word_final = word_lemmatized.lemmatize(word, tag_map[tag[0]])
            final_words.append(word_final)

    train_set.loc[index, "essay_final"] = str(final_words)
train_set.drop(train_set.columns[2], axis=1, inplace=True) # removes the original essay column

In [112]:
train_set['type1'] = train_set.apply(lambda x: 0 if x['avg_score'] <= 3 else 1, axis=1)
train_set['type2'] = train_set.apply(lambda x: 0 if x['avg_score'] <= 4 else 1, axis=1)
train_set['type3'] = train_set.apply(lambda x: 0 if x['avg_score'] <= 5 else 1, axis=1)

In [131]:
train_set['avg_score'] = (train_set['rater1_domain1'] + train_set['rater2_domain1']) / 2 # calculate average score
train_set['avg_score'] = train_set['avg_score'].apply(np.ceil).astype(int) # round off average score

Unnamed: 0,essay_id,essay_set,rater1_domain1,rater2_domain1,domain1_score,avg_score,essay_final,type1,type2,type3
0,2978,2,4,4,4,4,"['certain', 'material', 'remove', 'library', '...",1,0,0
1,2979,2,1,2,1,2,"['write', 'persuasive', 'essay', 'newspaper', ...",0,0,0
2,2980,2,2,3,2,3,"['think', 'library', 'remove', 'certain', 'mat...",0,0,0
3,2981,2,4,4,4,4,"['world', 'many', 'thing', 'find', 'offensive'...",1,0,0
4,2982,2,4,4,4,4,"['life', 'thing', 'little', 'stuff', 'get', 's...",1,0,0
...,...,...,...,...,...,...,...,...,...,...
1795,4773,2,3,2,3,3,"['author', 'writting', 'take', 'book', 'adult'...",0,0,0
1796,4774,2,3,3,3,3,"['think', 'material', 'book', 'music', 'movie'...",0,0,0
1797,4775,2,2,2,2,2,"['yes', 'keep', 'book', 'music', 'movie', 'mag...",0,0,0
1798,4776,2,3,4,3,4,"['believe', 'book', 'magazine', 'music', 'movi...",1,0,0


## Classification with Average Score (6 labels from 1-6)

In [153]:
# splitting into training and testing set
train_essay, test_essay, train_label, test_label = model_selection.train_test_split(train_set['essay_final'], train_set['avg_score'], test_size=0.3)

# transform the avg score into label of 0,1,2,3....
Encoder = LabelEncoder()
train_label = Encoder.fit_transform(train_label)
test_label = Encoder.transform(test_label)

# transform essay into matrix
Tfidf_vect = TfidfVectorizer()
Tfidf_vect.fit(train_set["essay_final"])
train_essay_vect = Tfidf_vect.transform(train_essay)
test_essay_vect = Tfidf_vect.transform(test_essay)


In [154]:
# fitting training set into naive bayes
naive = naive_bayes.MultinomialNB()
naive.fit(train_essay_vect, train_label)

# fitting testing set on NB classifier
predictions_NB = naive.predict(test_essay_vect)
print(accuracy_score(predictions_NB, test_label)*100)

50.74074074074074


In [155]:
# Classifier - Algorithm - SVM
# fit the training dataset on the classifier
SVM = svm.SVC(C=1.0, kernel='rbf', degree=2, gamma='auto')
SVM.fit(train_essay_vect, train_label)
predictions_SVM = SVM.predict(test_essay_vect)
print(accuracy_score(predictions_SVM, test_label)*100)

50.0


In [156]:
predictions_NB

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,

## Classification with Type 1 (2 labels: Label 0: Score 1-3, Label 1: Score 4-6)

In [149]:
# splitting into training and testing set
train_essay, test_essay, train_label, test_label = model_selection.train_test_split(train_set['essay_final'], train_set['type1'], test_size=0.3)

# transform the avg score into label of 0,1,2,3....
Encoder = LabelEncoder()
train_label = Encoder.fit_transform(train_label)
test_label = Encoder.transform(test_label)

# transform essay into matrix
Tfidf_vect = TfidfVectorizer()
Tfidf_vect.fit(train_set["essay_final"])
train_essay_vect = Tfidf_vect.transform(train_essay)
test_essay_vect = Tfidf_vect.transform(test_essay)


In [150]:
# fitting training set into naive bayes
naive = naive_bayes.MultinomialNB()
naive.fit(train_essay_vect, train_label)

# fitting testing set on NB classifier
predictions_NB = naive.predict(test_essay_vect)
print(accuracy_score(predictions_NB, test_label)*100)

57.592592592592595


In [151]:
# Classifier - Algorithm - SVM
# fit the training dataset on the classifier
SVM = svm.SVC(C=1.0, kernel='rbf', degree=2, gamma='auto')
SVM.fit(train_essay_vect, train_label)
predictions_SVM = SVM.predict(test_essay_vect)
print(accuracy_score(predictions_SVM, test_label)*100)

54.074074074074076


In [152]:
predictions_NB

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,

## Classification with Type 2 (2 labels: Label 0: Score 1-4, Label 1: Score 5-6)

In [157]:
# splitting into training and testing set
train_essay, test_essay, train_label, test_label = model_selection.train_test_split(train_set['essay_final'], train_set['type2'], test_size=0.3)

# transform the avg score into label of 0,1,2,3....
Encoder = LabelEncoder()
train_label = Encoder.fit_transform(train_label)
test_label = Encoder.transform(test_label)

# transform essay into matrix
Tfidf_vect = TfidfVectorizer()
Tfidf_vect.fit(train_set["essay_final"])
train_essay_vect = Tfidf_vect.transform(train_essay)
test_essay_vect = Tfidf_vect.transform(test_essay)


In [158]:
# fitting training set into naive bayes
naive = naive_bayes.MultinomialNB()
naive.fit(train_essay_vect, train_label)

# fitting testing set on NB classifier
predictions_NB = naive.predict(test_essay_vect)
print(accuracy_score(predictions_NB, test_label)*100)

93.7037037037037


In [159]:
# Classifier - Algorithm - SVM
# fit the training dataset on the classifier
SVM = svm.SVC(C=1.0, kernel='rbf', degree=2, gamma='auto')
SVM.fit(train_essay_vect, train_label)
predictions_SVM = SVM.predict(test_essay_vect)
print(accuracy_score(predictions_SVM, test_label)*100)

93.7037037037037


In [160]:
predictions_NB

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

## Classification with Type 3 (2 labels: Label 0: Score 1-5, Label 1: Score 6)

In [161]:
# splitting into training and testing set
train_essay, test_essay, train_label, test_label = model_selection.train_test_split(train_set['essay_final'], train_set['type3'], test_size=0.3)

# transform the avg score into label of 0,1,2,3....
Encoder = LabelEncoder()
train_label = Encoder.fit_transform(train_label)
test_label = Encoder.transform(test_label)

# transform essay into matrix
Tfidf_vect = TfidfVectorizer()
Tfidf_vect.fit(train_set["essay_final"])
train_essay_vect = Tfidf_vect.transform(train_essay)
test_essay_vect = Tfidf_vect.transform(test_essay)


In [162]:
# fitting training set into naive bayes
naive = naive_bayes.MultinomialNB()
naive.fit(train_essay_vect, train_label)

# fitting testing set on NB classifier
predictions_NB = naive.predict(test_essay_vect)
print(accuracy_score(predictions_NB, test_label)*100)

99.07407407407408


In [163]:
# Classifier - Algorithm - SVM
# fit the training dataset on the classifier
SVM = svm.SVC(C=1.0, kernel='rbf', degree=2, gamma='auto')
SVM.fit(train_essay_vect, train_label)
predictions_SVM = SVM.predict(test_essay_vect)
print(accuracy_score(predictions_SVM, test_label)*100)

99.07407407407408


In [178]:
predictions_NB

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [165]:
len(predictions_NB)

540

In [167]:
test_essay

1005    ['censorship', 'touchy', 'subject', 'society',...
1471    ['would', 'like', 'someone', 'take', 'book', '...
967     ['find', 'thing', 'offensive', 'wether', 'book...
1268    ['every', 'parent', 'want', 'protect', 'child'...
1622    ['censorship', 'method', 'limit', 'certain', '...
                              ...                        
1362    ['censorship', 'future', 'kid', 'america', 'pe...
205     ['consitered', 'offensive', 'take', 'book', 'm...
1496    ['censorship', 'library', 'question', 'side', ...
369     ['censorship', 'world', 'get', 'bad', 'everyda...
707     ['think', 'library', 'book', 'make', 'start', ...
Name: essay_final, Length: 540, dtype: object

In [176]:
te = test_essay.reset_index()
te.iloc[99,:]

index                                                       1654
essay_final    ['happen', 'use', 'book', 'film', 'simple', 'e...
Name: 99, dtype: object

In [173]:
test_essay

1005    ['censorship', 'touchy', 'subject', 'society',...
1471    ['would', 'like', 'someone', 'take', 'book', '...
967     ['find', 'thing', 'offensive', 'wether', 'book...
1268    ['every', 'parent', 'want', 'protect', 'child'...
1622    ['censorship', 'method', 'limit', 'certain', '...
                              ...                        
1362    ['censorship', 'future', 'kid', 'america', 'pe...
205     ['consitered', 'offensive', 'take', 'book', 'm...
1496    ['censorship', 'library', 'question', 'side', ...
369     ['censorship', 'world', 'get', 'bad', 'everyda...
707     ['think', 'library', 'book', 'make', 'start', ...
Name: essay_final, Length: 540, dtype: object

In [168]:
test_label

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [169]:
for i in range(len(test_label)):
    if test_label[i] == 1:
        print(i)

99
153
165
400
521


In [177]:
test_essay[99]

"['seem', 'always', 'make', 'everyone', 'happy', 'matter', 'someone', 'offend', 'upset', 'something', 'hold', 'true', 'include', 'medium', 'whether', 'book', 'movie', 'song', 'someone', 'somewhere', 'unhappy', 'reason', 'though', 'true', 'certainly', 'mean', 'take', 'medium', 'away', 'enjoy', 'people', 'grant', 'freedom', 'speech', 'freedom', 'press', 'would', 'take', 'material', 'shelf', 'take', 'away', 'right', 'clearly', 'intently', 'give', 'u', 'shelter', 'public', 'make', 'u', 'believe', 'world', 'place', 'without', 'bias', 'prejudice', 'slander', 'really', 'help', 'u', 'grand', 'scheme', 'thing', 'pretend', 'world', 'perfect', 'ensures', 'future', 'generation', 'world', 'devestated', 'reality', 'get', 'real', 'world', 'realize', 'everything', 'perfect', 'nice', 'unbiased', 'time', 'ninth', 'grade', 'year', 'high', 'school', 'read', 'book', 'demonstrate', 'harsh', 'prejudice', 'use', 'normal', 'early', 'year', 'book', 'could', 'certainly', 'consider', 'offensive', 'person', 'livin

In [114]:
# splitting into training and testing set
train_essay, test_essay, train_label, test_label = model_selection.train_test_split(train_set['essay_final'], train_set['type1'], test_size=0.3)

In [115]:
# transform the avg score into label of 0,1,2,3....
Encoder = LabelEncoder()
train_label = Encoder.fit_transform(train_label)
test_label = Encoder.transform(test_label)

In [119]:
# transform essay into matrix
Tfidf_vect = TfidfVectorizer()
Tfidf_vect.fit(train_set["essay_final"])
train_essay_vect = Tfidf_vect.transform(train_essay)
test_essay_vect = Tfidf_vect.transform(test_essay)

In [121]:
# fitting training set into naive bayes
naive = naive_bayes.MultinomialNB()
naive.fit(train_essay_vect, train_label)

# fitting testing set on NB classifier
predictions_NB = naive.predict(test_essay_vect)
print(accuracy_score(predictions_NB, test_label)*100)
# predictions_NB

63.70370370370371


In [122]:
# Classifier - Algorithm - SVM
# fit the training dataset on the classifier
SVM = svm.SVC(C=1.0, kernel='rbf', degree=2, gamma='auto')
SVM.fit(train_essay_vect, train_label)
predictions_SVM = SVM.predict(test_essay_vect)
print("SVM Accuracy Score -> ",accuracy_score(predictions_SVM, test_label)*100)

SVM Accuracy Score ->  59.81481481481481
