In [None]:
!pip install -U spacy

In [None]:
!pip install -U spacy-lookups-data

In [None]:
!python -m spacy download en_core_web_lg

In [None]:
!python -m spacy link en_core_web_lg en

In [None]:
import spacy 
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS as stopwords 
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.metrics import accuracy_score 
from sklearn.base import TransformerMixin 
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
import string
punctuations = string.punctuation

In [None]:
spacy.load('en')
from spacy.lang.en import English
parser = English()


In [None]:
#Custom transformer using spaCy 
class predictors(TransformerMixin):
    def transform(self, X, **transform_params):
        return [clean_text(text) for text in X]
    def fit(self, X, y=None, **fit_params):
        return self
    def get_params(self, deep=True):
        return {}


In [None]:
# Basic utility function to clean the text 
def clean_text(text):     
    return text.strip().lower()
def spacy_tokenizer(sentence):
    tokens = parser(sentence)
    tokens = [tok.lemma_.lower().strip() if tok.lemma_ != "-PRON-" else tok.lower_ for tok in tokens]
    tokens = [tok for tok in tokens if (tok not in stopwords and tok not in punctuations)] 
    return tokens

In [None]:
#create vectorizer object to generate feature vectors, we will use custom spacy tokenizer
vectorizer = CountVectorizer(tokenizer = spacy_tokenizer, ngram_range=(1,1)) 
classifier = LinearSVC()
# Create the  pipeline to clean, tokenize, vectorize, and classify 
pipe = Pipeline([("cleaner", predictors()),('vectorizer', vectorizer),('classifier', classifier)])

In [None]:
# Load sample data
train = [('I am Adhiraj Banerjee.', 'Grammatically correct'),          
         ('this is an amazing platform to create ML files!', 'Grammatically correct'),
         ('I feel very good about them .', 'Grammatically correct'),
         ('I study in IIEST,Shibpur.', 'Grammatically correct'),
         ("what an awesome view", 'Grammatically correct'),
         ('I like do read books', 'Grammatically incorrect'),
         ('I tired of sitting in home.', 'Grammatically incorrect'),
         ("I may a good result", 'Grammatically incorrect'),
         ('he is brother me', 'Grammatically incorrect'),          
         ('I am in horrible situation.', 'Grammatically correct'),
          ('He is my Friend.', 'Grammatically correct'),
          ('I to love read story books.', 'Grammatically incorrect')
         ] 


In [None]:
test =   [('He has been affected a lot.', 'Grammatically incorrect'),     
         ('The government is concentrating on health issues.', 'Grammatically correct'),
         ("He may a bad result.", 'Grammatically correct'),
         ("I feel amazing!", 'Grammatically correct'),
         ('He is a good friend of mine.', 'Grammatically correct'),
         ("She is in good situation.", 'Grammatically incorrect'),
           ('She tired of standing in school.', 'Grammatically correct'),
          ('He is brother my.', 'Grammatically correct'),
          ('He to hate read story book.', 'Grammatically correct')
          ]

In [None]:
# Create model and measure accuracy
pipe.fit([x[0] for x in train], [x[1] for x in train]) 
pred_data = pipe.predict([x[0] for x in test]) 
for (sample, pred) in zip(test, pred_data):
    print(sample, pred )

('He has been affected a lot.', 'Grammatically incorrect') Grammatically correct
('The government is concentrating on health issues.', 'Grammatically correct') Grammatically correct
('He may a bad result.', 'Grammatically correct') Grammatically incorrect
('I feel amazing!', 'Grammatically correct') Grammatically correct
('He is a good friend of mine.', 'Grammatically correct') Grammatically correct
('She is in good situation.', 'Grammatically incorrect') Grammatically correct
('She tired of standing in school.', 'Grammatically correct') Grammatically incorrect
('He is brother my.', 'Grammatically correct') Grammatically incorrect
('He to hate read story book.', 'Grammatically correct') Grammatically incorrect
