In [None]:
import tensorflow as tf
import tensorflow.keras as keras
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from keras.utils.vis_utils import plot_model
import numpy as np
import codecs
from keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
import warnings
warnings.filterwarnings('ignore')

### **Read en,fa data**

In [None]:
%%time
with open("TEP-fa.txt") as f:
    fa_file = f.read() 

with open("TEP-en.txt") as f:
    en_file = f.read()    

CPU times: user 184 ms, sys: 77.5 ms, total: 262 ms
Wall time: 259 ms


In [None]:
fa_data = fa_file.strip().split('\n')
en_data = en_file.strip().split('\n')

In [None]:
len(fa_data)

612086

In [None]:
len(en_data)

612086

In [None]:
data = pd.DataFrame(columns=['en','fa'])
data['fa'] = fa_data
data['en'] = en_data

In [None]:
data.head()

Unnamed: 0,en,fa
0,raspy breathing .,صداي خر خر .
1,dad .,پدر .
2,maybe its the wind .,شايد صداي باد باشه .
3,no .,نه .
4,stop please stop .,دست نگه داريد خواهش ميکنم دست نگه داريد .


### **Export DataFrame to CSV**

In [None]:
data.to_csv('en-fa_MT_dataset.csv', index=False)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!cp -r "/content/en-fa_MT_dataset.csv" "/content/drive/MyDrive/en-fa_MT_dataset.csv"

Mounted at /content/drive


# **Read Dataset**

In [None]:
!gdown --id 101S8yZESRK5YL0a886tTz1hnCFzhaRxe

Downloading...
From: https://drive.google.com/uc?id=101S8yZESRK5YL0a886tTz1hnCFzhaRxe
To: /content/en-fa_MT_dataset.csv
100% 55.4M/55.4M [00:00<00:00, 186MB/s]


In [None]:
data = pd.read_csv('en-fa_MT_dataset.csv')

In [None]:
data.head(3)

Unnamed: 0,en,fa
0,raspy breathing .,صداي خر خر .
1,dad .,پدر .
2,maybe its the wind .,شايد صداي باد باشه .


In [None]:
len(data)

612086

In [None]:
data['en_size'] = data['en'].str.count(' ')
data['fa_size'] = data['fa'].str.count(' ')

In [None]:
data.head()

Unnamed: 0,en,fa,en_size,fa_size
0,raspy breathing .,صداي خر خر .,2,3
1,dad .,پدر .,1,1
2,maybe its the wind .,شايد صداي باد باشه .,4,4
3,no .,نه .,1,1
4,stop please stop .,دست نگه داريد خواهش ميکنم دست نگه داريد .,3,8


In [None]:
data['en_no_punctuation'] = data['en'].str.replace('[^\w\s]','')
data['en_no_punctuation'] = '<start> ' + data["en_no_punctuation"].str.lower() + ' <end>'

In [None]:
data['fa_no_punctuation'] = '<start> ' + data['fa'].str.replace('[^\w\s]','') + ' <end>'

In [None]:
data.head()

Unnamed: 0,en,fa,en_size,fa_size,en_no_punctuation,fa_no_punctuation
0,raspy breathing .,صداي خر خر .,2,3,<start> raspy breathing <end>,<start> صداي خر خر <end>
1,dad .,پدر .,1,1,<start> dad <end>,<start> پدر <end>
2,maybe its the wind .,شايد صداي باد باشه .,4,4,<start> maybe its the wind <end>,<start> شايد صداي باد باشه <end>
3,no .,نه .,1,1,<start> no <end>,<start> نه <end>
4,stop please stop .,دست نگه داريد خواهش ميکنم دست نگه داريد .,3,8,<start> stop please stop <end>,<start> دست نگه داريد خواهش ميکنم دست نگه داري...


In [None]:
data.loc[1, 'fa_no_punctuation']

'<start> پدر  <end>'

In [None]:
en_data = data['en_no_punctuation'].values[:150_000]
fa_data = data['fa_no_punctuation'].values[:150_000]

In [None]:
del data

In [None]:
en_data = [re.sub('\s+', ' ', str(sentence)) for sentence in en_data]

In [None]:
fa_data = [re.sub('\s+', ' ', str(sentence)) for sentence in fa_data]

In [None]:
en_data[0:5]

['<start> raspy breathing <end>',
 '<start> dad <end>',
 '<start> maybe its the wind <end>',
 '<start> no <end>',
 '<start> stop please stop <end>']

In [None]:
fa_data[0:5]

['<start> صداي خر خر <end>',
 '<start> پدر <end>',
 '<start> شايد صداي باد باشه <end>',
 '<start> نه <end>',
 '<start> دست نگه داريد خواهش ميکنم دست نگه داريد <end>']

In [None]:
def max_length(tensor):
    return max(len(t) for t in tensor)

In [None]:
def tokenize(lang):
    lang_tokenizer = Tokenizer(filters='')
    lang_tokenizer.fit_on_texts(lang)
    tensor = lang_tokenizer.texts_to_sequences(lang)
    tensor = pad_sequences(tensor, padding='post')
    return tensor, lang_tokenizer

In [None]:
def load_dataset():
    inp_lang = en_data
    targ_lang = fa_data
    input_tensor, input_lang_tokenizer = tokenize(inp_lang)
    target_tensor, target_lang_tokenizer = tokenize(targ_lang)
    return input_tensor, target_tensor, input_lang_tokenizer, target_lang_tokenizer

In [None]:
input_tensor, target_tensor, input_lang_tokenizer, target_lang_tokenizer = load_dataset()

In [None]:
input_tensor

array([[    1, 17591,  1810, ...,     0,     0,     0],
       [    1,   326,     2, ...,     0,     0,     0],
       [    1,   173,    30, ...,     0,     0,     0],
       ...,
       [    1,   272,    23, ...,     0,     0,     0],
       [    1,   272,    23, ...,     0,     0,     0],
       [    1,    90,    19, ...,     0,     0,     0]], dtype=int32)

In [None]:
print(input_tensor.shape)
print(target_tensor.shape)

(150000, 36)
(150000, 32)


In [None]:
del en_data
del fa_data

In [None]:
max_length_targ, max_length_inp = max_length(target_tensor), max_length(input_tensor)

In [None]:
max_length_targ, max_length_inp

(32, 36)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(input_tensor, target_tensor, test_size=0.2)

In [None]:
def convert_tensor_to_word(lang_tokenizer, tensor):
    for t in tensor:
        if t != 0:
            print(t, ': ', lang_tokenizer.index_word[t])

In [None]:
convert_tensor_to_word(input_lang_tokenizer, input_tensor[2])

1 :  <start>
173 :  maybe
30 :  its
4 :  the
1328 :  wind
2 :  <end>


In [None]:
input_tensor[0]

array([    1, 17591,  1810,     2,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0],
      dtype=int32)

In [None]:
input_tensor.shape

(150000, 36)

In [None]:
list(input_lang_tokenizer.word_index)[0:20]

['<start>',
 '<end>',
 'you',
 'the',
 'i',
 'to',
 'a',
 'it',
 'and',
 'of',
 'is',
 'that',
 'in',
 'me',
 'what',
 'this',
 'we',
 'your',
 'my',
 'have']

In [None]:
del input_tensor
del target_tensor

In [None]:
BUFFER_SIZE = len(X_train)
BATCH_SIZE = 256
steps_per_epoch = len(X_train) // BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_inp_size = len(input_lang_tokenizer.word_index) + 1
vocab_targ_size = len(target_lang_tokenizer.word_index) + 1

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [None]:
class Encoder(keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units, batch_size):
        super(Encoder, self).__init__()
        self.batch_size = batch_size
        self.enc_units = enc_units
        self.embedding = keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = keras.layers.GRU(self.enc_units, return_sequences=True, return_state=True)
    def call(self, x, hidden):
        x = self.embedding(x)
        output, state = self.gru(x, initial_state=hidden)
        return output, state
    def initilize_hidden_state(self):
        return tf.zeros((self.batch_size, self.enc_units))

In [None]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)

In [None]:
encoder

<__main__.Encoder at 0x7fc5e727be10>

In [None]:
simple_hidden = encoder.initilize_hidden_state()
simple_hidden

<tf.Tensor: shape=(256, 1024), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>

In [None]:
example_input_batch, example_target_batch = next(iter(dataset))

In [None]:
encoder(example_input_batch, simple_hidden)

(<tf.Tensor: shape=(256, 36, 1024), dtype=float32, numpy=
 array([[[-5.57733420e-03,  1.93463976e-03, -1.29232705e-02, ...,
          -2.17668572e-03,  6.33166311e-03,  9.54738061e-04],
         [ 2.91131134e-03,  1.34674739e-03, -7.28727551e-03, ...,
          -8.83924123e-03, -1.70115638e-03,  7.97948707e-03],
         [ 1.04775578e-02,  3.45648360e-03,  4.51482460e-03, ...,
           1.65527593e-03, -2.26074434e-03,  6.29868591e-03],
         ...,
         [-5.38144354e-03,  5.14393719e-03, -7.42220040e-03, ...,
          -1.65800471e-02,  1.48005895e-02,  4.35499568e-03],
         [-5.38139744e-03,  5.14387246e-03, -7.42216222e-03, ...,
          -1.65799521e-02,  1.48006268e-02,  4.35502175e-03],
         [-5.38137183e-03,  5.14382962e-03, -7.42213847e-03, ...,
          -1.65798943e-02,  1.48006510e-02,  4.35504271e-03]],
 
        [[-5.57733420e-03,  1.93463976e-03, -1.29232705e-02, ...,
          -2.17668572e-03,  6.33166311e-03,  9.54738061e-04],
         [-6.01179060e-03,  8

In [None]:
simple_output, simple_states = encoder(example_input_batch, simple_hidden)

In [None]:
class Attention(keras.layers.Layer):
    def __init__ (self, units):
        super(Attention, self).__init__()
        self.W1 = keras.layers.Dense(units)
        self.W2 = keras.layers.Dense(units)
        self.V = keras.layers.Dense(1)
    def call (self, query, values):
        hidden_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))
        atteion_weights = tf.nn.softmax(score, axis=1)
        context_vector = atteion_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, atteion_weights

In [None]:
attention_layer = Attention(10)
attention_layer(simple_hidden, simple_output)

(<tf.Tensor: shape=(256, 1024), dtype=float32, numpy=
 array([[-0.00368485,  0.00456377, -0.00475135, ..., -0.01231724,
          0.01065637,  0.00556866],
        [-0.00297119,  0.00351883, -0.00563804, ..., -0.00933621,
          0.01177633,  0.00589203],
        [-0.00487128,  0.00396191, -0.00601505, ..., -0.01132322,
          0.01072252,  0.00286333],
        ...,
        [-0.00699962,  0.00348471, -0.00241681, ..., -0.00980218,
          0.01191147,  0.00364675],
        [-0.00549176,  0.00375045, -0.00679198, ..., -0.01226841,
          0.01312873,  0.00516596],
        [-0.00378583,  0.00289025, -0.0073575 , ..., -0.01295296,
          0.01168837,  0.00416749]], dtype=float32)>,
 <tf.Tensor: shape=(256, 36, 1), dtype=float32, numpy=
 array([[[0.02781183],
         [0.02777026],
         [0.02764382],
         ...,
         [0.0278602 ],
         [0.0278602 ],
         [0.0278602 ]],
 
        [[0.02784223],
         [0.02740934],
         [0.02794278],
         ...,
         [

In [None]:
attention_result, attention_weights = attention_layer(simple_hidden, simple_output)

In [None]:
class Decoder(keras.Model):
    def __init__ (self, vocab_size, embedding_dim, dec_units, batch_size):
        super(Decoder, self).__init__()
        self.batch_size = batch_size
        self.dec_units = dec_units
        self.embedding = keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = keras.layers.GRU(self.dec_units, return_sequences=True, return_state=True)
        self.fc = keras.layers.Dense(vocab_size)
        self.attention = Attention(self.dec_units)
    def call(self, x, hidden, enc_output):
        context_vector, attention_weights = self.attention(hidden, enc_output)
        x = self.embedding(x)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
        output, state = self.gru(x)
        output = tf.reshape(output, (-1, output.shape[2]))
        x = self.fc(output)
        return x, state, attention_weights

In [None]:
decoder = Decoder(vocab_targ_size, embedding_dim, units, BATCH_SIZE)

In [None]:
decoder(tf.random.uniform((BATCH_SIZE, 1)), simple_hidden, simple_output)

(<tf.Tensor: shape=(256, 50617), dtype=float32, numpy=
 array([[-1.6781237e-04,  2.9255694e-05,  2.3602224e-03, ...,
         -4.2361341e-04, -1.3501514e-04, -2.8217185e-04],
        [-2.1054407e-04,  6.2142761e-05,  2.4375161e-03, ...,
         -3.7714036e-04, -2.9382913e-04, -2.1737069e-04],
        [-1.5343944e-04,  1.3265519e-04,  2.4651913e-03, ...,
         -1.9698153e-04, -8.6423883e-05, -1.3092706e-04],
        ...,
        [-3.2503996e-04,  1.5269622e-04,  2.3692504e-03, ...,
         -2.3654202e-04,  2.7905946e-05,  7.1359085e-05],
        [-5.6934601e-05, -6.6164852e-05,  2.4493071e-03, ...,
         -1.9223554e-04, -1.7393878e-04, -1.0048115e-04],
        [-1.6900798e-05,  6.2726205e-05,  2.4797227e-03, ...,
         -2.5679485e-04, -1.1613939e-04, -1.1284187e-04]], dtype=float32)>,
 <tf.Tensor: shape=(256, 1024), dtype=float32, numpy=
 array([[-0.00621324,  0.00917714,  0.01181603, ...,  0.00739351,
          0.00189937,  0.00280944],
        [-0.00513341,  0.00896678,  0.

In [None]:
optimizer = keras.optimizers.Adam()
loss_object = keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

In [None]:
def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_mean(loss_)

In [None]:
checkpoint_dir = 'chckpnts'
checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder)

In [None]:
def train_step(inp, targ, enc_hidden):
    loss = 0
    with tf.GradientTape() as tape:
        enc_output, enc_hidden = encoder(inp, enc_hidden)
        dec_hidden = enc_hidden
        dec_input = tf.expand_dims([target_lang_tokenizer.word_index['<start>']] * BATCH_SIZE, 1)
        for t in range(1, targ.shape[1]):
            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)
            loss += loss_function(targ[:, t], predictions)
            dec_input = tf.expand_dims(targ[:, t], 1)
    batch_loss = (loss / int(targ.shape[1]))
    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
    return batch_loss

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
EPOCH = 10
for epoch in range(EPOCH):
    enc_hidden = encoder.initilize_hidden_state()
    total_loss = 0
    for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
        batch_loss = train_step(inp, targ, enc_hidden)
        total_loss += batch_loss
        print('Epoch: ', epoch)
        print('Loss: ', batch_loss.numpy())
    checkpoint.save(file_prefix='test1')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch:  4
Loss:  0.93347514
Epoch:  4
Loss:  0.84476227
Epoch:  4
Loss:  0.8942217
Epoch:  4
Loss:  0.7811055
Epoch:  4
Loss:  0.86490095
Epoch:  4
Loss:  0.8499682
Epoch:  4
Loss:  0.85896116
Epoch:  4
Loss:  0.84145874
Epoch:  4
Loss:  0.81320894
Epoch:  4
Loss:  0.8361155
Epoch:  4
Loss:  0.86841553
Epoch:  4
Loss:  0.88361984
Epoch:  4
Loss:  0.8283958
Epoch:  4
Loss:  0.8162862
Epoch:  4
Loss:  0.90168875
Epoch:  4
Loss:  0.904313
Epoch:  4
Loss:  0.9191024
Epoch:  4
Loss:  0.8451974
Epoch:  4
Loss:  0.8752406
Epoch:  4
Loss:  0.8507817
Epoch:  4
Loss:  0.8301291
Epoch:  4
Loss:  0.8826971
Epoch:  4
Loss:  0.8696635
Epoch:  4
Loss:  0.85960317
Epoch:  4
Loss:  0.87690663
Epoch:  4
Loss:  0.8993135
Epoch:  4
Loss:  0.86364895
Epoch:  4
Loss:  0.8951596
Epoch:  4
Loss:  0.8382962
Epoch:  4
Loss:  0.8856038
Epoch:  4
Loss:  0.8377232
Epoch:  4
Loss:  0.879175
Epoch:  4
Loss:  0.9387172
Epoch:  4
Loss:  0.90860003
Epoch:

In [None]:
!gdown --id 1CkZlqTf_JP8LPnlFVuhj8t5cEGzd_Wqi
!gdown --id 1TjbA4BPYq3Qza3FqmlNxkq9c6BWTsSNQ
!gdown --id 1-0AwlQ2W5M7JYzDjzWxxRi3tXYkaYpLT
!gdown --id 1-102ZVFjrHtzampS_197f8NmqLsZ8bzu
!gdown --id 1-7tn1aZlgVDz19_mKTrF6EBInyqSY5ca
!gdown --id 1-83gRDn8CHlkwajHpHp1vIVkWcskb15J
!gdown --id 1-89YMMbAsul_1v_T8KnqF6DkBEAvxZLc
!gdown --id 1-BNIGaCgbedNxgJPfyaixsxrH0qOS9F9
!gdown --id 1-CCANs05OllnZSj02u4c2IOMTYXHbqqI
!gdown --id 1-GuIuZk3hRvygz3FdkpqtgDspZPtLkuT
!gdown --id 1-Hn2a5abKj6cZnormJBeCmRz3L_76x5P
!gdown --id 1-Lg_FBO0RE3a1fNwm-dRCUaNVz1tVO8x
!gdown --id 1-NBClpPSyltQJ_UytiB1wBDarhoI3xA1
!gdown --id 1-Ou_SELtsN9_9qi3YwXW3bBWJoNvhBG2
!gdown --id 1-Q3x6LGNE_uVsxQ0ExYDtKyibyfQGwv3
!gdown --id 1-QGyKhCTBYsqSmKJ_aAkelxEfRhcZH7x
!gdown --id 1-SXD9GiAfRzHerlmefOI04obKwH5eaXw
!gdown --id 1-Tyu1MxoK-tj4ew11I0vmb4cyElgZ7EW
!gdown --id 1-Uy7q2eWo46IxyttKq1Ia-j6Qov6hvzZ
!gdown --id 1-Vd7siFKkU9BqPCsE5KXCsqgHt-x1Myj

Downloading...
From: https://drive.google.com/uc?id=1CkZlqTf_JP8LPnlFVuhj8t5cEGzd_Wqi
To: /content/test1-1.data-00000-of-00001
100% 1.03G/1.03G [00:16<00:00, 63.2MB/s]
Downloading...
From: https://drive.google.com/uc?id=1TjbA4BPYq3Qza3FqmlNxkq9c6BWTsSNQ
To: /content/test1-1.index
100% 3.97k/3.97k [00:00<00:00, 6.20MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-0AwlQ2W5M7JYzDjzWxxRi3tXYkaYpLT
To: /content/test1-2.data-00000-of-00001
100% 1.03G/1.03G [00:15<00:00, 68.6MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-102ZVFjrHtzampS_197f8NmqLsZ8bzu
To: /content/test1-2.index
100% 3.97k/3.97k [00:00<00:00, 6.25MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-7tn1aZlgVDz19_mKTrF6EBInyqSY5ca
To: /content/test1-3.data-00000-of-00001
100% 1.03G/1.03G [00:15<00:00, 66.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-83gRDn8CHlkwajHpHp1vIVkWcskb15J
To: /content/test1-3.index
100% 3.97k/3.97k [00:00<00:00, 6.57MB/s]
Downloading...
From: https://drive

In [None]:
def preprocess_senetence(w):
    w = re.sub(r'[" "]+', " ", w)
    w = re.sub(r"[^a-zA-Z?.~,]+", " ", w)
    w = w.rstrip().strip()
    w = '<start> ' + w + ' <end>'
    return w

In [None]:
def evaluate(sentence):
    sentence = preprocess_senetence(sentence)
    inputs = [input_lang_tokenizer.word_index[i] for i in sentence.split(' ')]
    inputs = keras.preprocessing.sequence.pad_sequences([inputs], maxlen=max_length_inp, padding='post')
    inputs = tf.convert_to_tensor(inputs)
    result = ''
    hidden = [tf.zeros((1, units))]
    enc_out, enc_hidden = encoder(inputs, hidden)
    dec_hidden = enc_hidden
    dec_input = tf.expand_dims([target_lang_tokenizer.word_index['<start>']], 0)
    for t in range(max_length_targ):
        predictions, dec_hidden, attention_weights = decoder(dec_input, dec_hidden, enc_out)
        attention_weights = tf.reshape(attention_weights, (-1, ))
        predicted_id = tf.argmax(predictions[0]).numpy()
        result += target_lang_tokenizer.index_word[predicted_id] + ' '
        if target_lang_tokenizer.index_word[predicted_id] == '<end>':
            return result, sentence
        dec_input = tf.expand_dims([predicted_id], 0)
    return result, sentence

In [None]:
checkpoint.restore(tf.train.latest_checkpoint(''))

<tensorflow.python.training.tracking.util.InitializationOnlyStatus at 0x7fc5e5176810>

In [None]:
evaluate('yes')

('سردبير معقول آشغالت ابوت 2015 19ساعت پلاكهاي سببي اجلاسی مشعلها انافلكتيك شهادتتون 76 شگفت مثا آشغالت ابوت 2015 19ساعت پلاكهاي سببي اجلاسی مشعلها انافلكتيك شهادتتون 76 شگفت مثا آشغالت ابوت 2015 19ساعت ',
 '<start> yes <end>')

In [None]:
evaluate('maybe its the wind')

('سردبير شزط خرابش حرزهها همانندیهای شماري نميخوايي كاندوم کلیسا واخرين ناقلايه باشماست خدا فايدست حافظت قتل كنتوق آليسيا سيفونش بیفت الكسسلام 19ساعت داروی ميريزه جامد مادري قيافشو سلين روم قابلی بانکش اونچه ',
 '<start> maybe its the wind <end>')

In [None]:
evaluate('stop please stop')

('سردبير معقول آشغالت ابوت 2015 19ساعت داروی ميريزه جامد مادري قيافشو سلين روم قابلی بانکش اونچه ديوصفت درخشنده بل ديدهام مختو مسئوليتتو والسينگهام سيديوس شورشيها بنویسید گروهت معقول آشغالت ابوت 2015 19ساعت ',
 '<start> stop please stop <end>')

In [None]:
evaluate('mouth')

('دهنتو ببند <end> ', '<start> mouth <end>')

In [None]:
evaluate('i have great lessons today')

('امروز صبح بزرگ شدم <end> ', '<start> i have great lessons today <end>')

In [None]:
evaluate('hello')

('سلام <end> ', '<start> hello <end>')

In [None]:
evaluate('dad')

('بابا <end> ', '<start> dad <end>')

In [None]:
evaluate('zodiac')

('نفرين <end> ', '<start> zodiac <end>')