In [None]:
!pip install spacy
!python -m spacy download en_core_web_sm 
!pip install tensorflow

In [None]:
import spacy
import pandas as pd
import numpy as np



In [None]:
df = pd.read_csv('dataset (1).csv', encoding = 'latin1')

In [None]:
df.head()

Unnamed: 0,text,label
0,We ran on the beach.,correct
1,The music played in the background.,correct
2,She hung the picture above the couch.,correct
3,The moon shone above the trees.,correct
4,She swam under the water.,correct


In [None]:
len(df)

2693

In [None]:
nlp = spacy.load('en_core_web_sm')

In [None]:
# stopwords = nlp.Defaults.stop_words
# stopwords.add('the')
stopwords = {'the', 'are', 'is', 'them', 'why', 'what', 'where', 'how'}

In [None]:
df.columns = ['Sentences', 'Label']

In [None]:
df.isnull().any()

Sentences    False
Label         True
dtype: bool

In [None]:
df.dropna(inplace = True)

In [None]:
'on' in stopwords

False

# Forming incorrect sentences

In [None]:
import re
import random

preposition_errors = {
    "in": ["on", "at"],
    "of": ["for", "with", "in", "by", "about"],
    "to": ["at", "with"],
    "with": ["to", "for", "in"],
    "at": ["in", "on", "to", "with", "by"],
    "from": ["to", "with", "of", "in",],
    "by": ["with", "from", "for"],
    "about": ["of", "with", "for", "in", "on"],
    "over": ["on", "to", "in", "with",],
    "through": ["with", "to", "in", "on"]
}
incSentence = []
for sentence in df['Sentences']:
  for word in sentence.split():
      if word.lower() in preposition_errors:
          replacement_list = preposition_errors[word.lower()]
          for replacement in replacement_list:
            incSentence.append(re.sub(r'\b'+word+r'\b', replacement, sentence, flags=re.IGNORECASE))
          
print(len(incSentence))

1547


In [None]:
rows = []
for i, sentence in enumerate(incSentence):
  sentence = sentence.replace(',', '')
  incSentence[i] = sentence.replace('.', '.,incorrect')
  row = incSentence[i].split(',')
  rows.append(row)


In [None]:
rows[:5]

[['The music played on the background.', 'incorrect'],
 ['The music played at the background.', 'incorrect'],
 ['They drove with the tunnel.', 'incorrect'],
 ['They drove to the tunnel.', 'incorrect'],
 ['They drove in the tunnel.', 'incorrect']]

In [None]:
import csv
with open('dataset.csv', 'w') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerows(rows)

# Model 

In [None]:
maxlenofsentence = 20

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
myTokenizer = Tokenizer(num_words=800, oov_token="<UNK>")


In [None]:
def preprocess(textdata, fit = False):
  newFrame = []
  for sentence in textdata:
    sentence = sentence.lower()
    doc = nlp(sentence)
    newSent = []
    for token in doc:
      if token.text in stopwords or len(token.text)<=1:
        continue
      token = token.lemma_  
      newSent.append(token)
    #print(newSent)  
    newFrame.append(newSent)
  newSentences = []
  for sentence in newFrame:
    newSentences.append(' '.join(map(str, sentence)))   
  if fit:
    myTokenizer.fit_on_texts(newSentences) 
  sequences = myTokenizer.texts_to_sequences(newSentences)
  padded = pad_sequences(sequences, maxlen=maxlenofsentence)
  return padded

In [None]:
X_train = preprocess(df['Sentences'], fit = True)

In [None]:
y_train = df['Label']
y_train = np.array(y_train.map({'correct':1, 'incorrect':0}))

In [None]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

In [None]:
len(myTokenizer.word_index)

881

In [None]:
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, Dense, LSTM, Bidirectional, Conv1D

vocab_size = 1000
max_len = maxlenofsentence
embedding_dim = 32

model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len))
model.add(Dense(units=64, activation=tf.nn.relu))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(Bidirectional(LSTM(32)))
#model.add(SimpleRNN(units=32))
model.add(Dense(units=1, activation=tf.nn.sigmoid))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fd01e70c9d0>

In [None]:
#test = ['We are running in the water', 'Dog is sitting inside table']

In [None]:
test = '''She always sits beside me in class.
The picture frame is hanging above the couch.
The old book was found beneath the dusty shelves.
The car is parked underneath the streetlamp.
The keys are hidden inside the drawer.
The little boy ran towards his mother.
The hot air balloon is floating over the city.
The dog is resting near the fireplace.
The cake is on the platter.
The plane is landing onto the runway.
The butterfly is fluttering around the flowers.
The alarm clock is next to the bed.
The snow is falling onto the ground.
The train is passing beneath the bridge.
The squirrel is climbing up the tree.
The jacket is hanging on the coat hanger.
The sun is shining through the window.
The pen is on the desk.
The children are playing in the backyard.
The music is playing from the speakers.
The boat is sailing towards the island.
The girl is running around the park.
The pencil is in the pencil case.
The birds are nesting in the tree.
The coffee is served in the mug.
The car is driving along the highway.
The light is shining above the table.
The fish is swimming through the aquarium.
The phone is charging in the socket.
The airplane is flying over the clouds.
The kitten is sleeping on the pillow.
The man is standing in front of the door.
The rain is pouring onto the roof.
The tree is planted next to the sidewalk.
The spider is crawling under the bed.
The car is parked in front of the store.
The moon is shining upon the water.
The teacher is sitting behind the desk.
The kids are playing hide-and-seek around the house.
The book is lying on the floor.
The horse is galloping towards the fence.
The boy is standing on the edge of the pool.
The sun is rising above the horizon.
The mug is filled with hot chocolate.
The girl is standing under the umbrella.
The painting is hanging between two windows.
The train is traveling across the country.
The ant is crawling on the wall.
The flowers are blooming near the fountain.
The dog is running after the ball.
The book is opened at page 23.
The car is driving under the tunnel.
The guitar is leaning against the wall.
The bird is perched on the branch.
The girl is standing in front of the mirror.
The sun is setting behind the hills.
The cat is hiding behind the curtains.
The music is playing inside the car.
The cup is placed on the saucer.
The train is going over the bridge.
The man is walking towards the park.
The leaves are falling onto the ground.
The car is parked on the driveway.
The moon is shining through the clouds.
The plant is growing out of the pot.
The coffee is brewing in the pot.
The dog is digging under the fence.
The ship is sailing towards the port.
The girl is standing in front of the door.
The plane is descending towards the airport.
The book is kept on the shelf.
The snow is piled up on the sidewalk.
The tree is bending over the river.'''

In [None]:
test = test.split('\n')

In [None]:
testPadded = preprocess(test)

In [None]:
y_test = np.ones((73, ))

In [None]:
loss, accuracy = model.evaluate(testPadded, y_test)
print(f'Test loss: {loss}, Test accuracy: {accuracy}')

Test loss: 1.2827247381210327, Test accuracy: 0.6712328791618347


In [None]:
model.save('MPmodel.h5')