In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import math
import os

In [2]:
# from tf.keras.models import Model  
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GRU, Embedding
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
tf.__version__

'2.3.0'

In [4]:
tf.keras.__version__

'2.4.0'

In [5]:
mark_start = 'ssss '
mark_end = ' eeee'

In [6]:
data_dir = "D:/Downloads/hi-pa/hi-pa"

In [7]:
def load_data(hindi=True, start="", end=""):
    """
    Load the data-file for either the English-language texts or
    for the other language (e.g. "da" for Danish).
    All lines of the data-file are returned as a list of strings.
    :param english:
      Boolean whether to load the data-file for
      English (True) or the other language (False).
    :param language_code:
      Two-char code for the other language e.g. "da" for Danish.
      See list of available codes above.
    :param start:
      Prepend each line with this text e.g. "ssss " to indicate start of line.
    :param end:
      Append each line with this text e.g. " eeee" to indicate end of line.
    :return:
      List of strings with all the lines of the data-file.
    """

    if hindi:
        # Load the Hindi data.
        filename = "train.hi"
    else:
        # Load the other language.
        filename = "train.pa"

    # Full path for the data-file.
    path = os.path.join(data_dir, filename)

    # Open and read all the contents of the data-file.
    with open(path, encoding="utf-8") as file:
        # Read the line from file, strip leading and trailing whitespace,
        # prepend the start-text and append the end-text.
        texts = [start + line.strip() + end for line in file]

    return texts


In [8]:
data_src = load_data(hindi=False)


In [9]:
data_dest = load_data(hindi=True,start=mark_start,end=mark_end)


In [10]:
idx = 2

In [11]:
data_src[idx]

"ਸਿਹਤ ਮੰਤਰੀ ਨੇ ਸਿਵਲ ਹਸਪਤਾਲ 'ਚ ਮਾਰਿਆ ਛਾਪਾ"

In [12]:
data_dest[idx]

'ssss बाढ़सा अस्पताल पहुंचे स्वास्थ्य मंत्री eeee'

In [13]:
idx = 8002

In [14]:
data_src[idx]

'ਸਰਕਾਰ ਨੇ ਨਹੀਂ ਕੀਤੀ ਸੀ?'

In [15]:
data_dest[idx]

'ssss क्या सरकार इसके लिए इच्छुक नहीं है? eeee'

In [16]:
num_words = 10000

In [17]:
class TokenizerWrap(Tokenizer):
    """Wrap the Tokenizer-class from Keras with more functionality."""
    
    def __init__(self, texts, padding,
                 reverse=False, num_words=None):
        """
        :param texts: List of strings. This is the data-set.
        :param padding: Either 'post' or 'pre' padding.
        :param reverse: Boolean whether to reverse token-lists.
        :param num_words: Max number of words to use.
        """

        Tokenizer.__init__(self, num_words=num_words)

        # Create the vocabulary from the texts.
        self.fit_on_texts(texts)

        # Create inverse lookup from integer-tokens to words.
        self.index_to_word = dict(zip(self.word_index.values(),
                                      self.word_index.keys()))

        # Convert all texts to lists of integer-tokens.
        # Note that the sequences may have different lengths.
        self.tokens = self.texts_to_sequences(texts)

        if reverse:
            # Reverse the token-sequences.
            self.tokens = [list(reversed(x)) for x in self.tokens]
        
            # Sequences that are too long should now be truncated
            # at the beginning, which corresponds to the end of
            # the original sequences.
            truncating = 'pre'
        else:
            # Sequences that are too long should be truncated
            # at the end.
            truncating = 'post'

        # The number of integer-tokens in each sequence.
        self.num_tokens = [len(x) for x in self.tokens]

        # Max number of tokens to use in all sequences.
        # We will pad / truncate all sequences to this length.
        # This is a compromise so we save a lot of memory and
        # only have to truncate maybe 5% of all the sequences.
        self.max_tokens = np.mean(self.num_tokens) \
                          + 2 * np.std(self.num_tokens)
        self.max_tokens = int(self.max_tokens)

        # Pad / truncate all token-sequences to the given length.
        # This creates a 2-dim numpy matrix that is easier to use.
        self.tokens_padded = pad_sequences(self.tokens,
                                           maxlen=self.max_tokens,
                                           padding=padding,
                                           truncating=truncating)
    def token_to_word(self, token):
        """Lookup a single word from an integer-token."""

        word = " " if token == 0 else self.index_to_word[token]
        return word 

    def tokens_to_string(self, tokens):
        """Convert a list of integer-tokens to a string."""

        # Create a list of the individual words.
        words = [self.index_to_word[token]
                 for token in tokens
                 if token != 0]
        
        # Concatenate the words to a single string
        # with space between all the words.
        text = " ".join(words)

        return text
    
    def text_to_tokens(self, text, reverse=False, padding=False):
        """
        Convert a single text-string to tokens with optional
        reversal and padding.
        """

        # Convert to tokens. Note that we assume there is only
        # a single text-string so we wrap it in a list.
        tokens = self.texts_to_sequences([text])
        tokens = np.array(tokens)

        if reverse:
            # Reverse the tokens.
            tokens = np.flip(tokens, axis=1)

            # Sequences that are too long should now be truncated
            # at the beginning, which corresponds to the end of
            # the original sequences.
            truncating = 'pre'
        else:
            # Sequences that are too long should be truncated
            # at the end.
            truncating = 'post'

        if padding:
            # Pad and truncate sequences to the given length.
            tokens = pad_sequences(tokens,
                                   maxlen=self.max_tokens,
                                   padding='pre',
                                   truncating=truncating)

        return tokens

In [18]:
%%time
tokenizer_src = TokenizerWrap(texts=data_src,
                              padding='pre',
                              reverse=True,
                              num_words=num_words)


CPU times: total: 1min 53s
Wall time: 1min 55s


In [19]:
%%time
tokenizer_dest = TokenizerWrap(texts=data_dest,
                               padding='post',
                               reverse=False,
                               num_words=num_words)

CPU times: total: 1min 34s
Wall time: 1min 36s


In [20]:
tokens_src = tokenizer_src.tokens_padded
tokens_dest = tokenizer_dest.tokens_padded
print(tokens_src.shape)
print(tokens_dest.shape)

(1427822, 32)
(1427822, 35)


In [21]:
token_start = tokenizer_dest.word_index[mark_start.strip()]
token_start

1

In [22]:
token_end = tokenizer_dest.word_index[mark_end.strip()]
token_end

2

In [23]:
idx = 2

In [24]:
tokens_src[idx]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0, 7486, 2368,   27,  131,  394,    4,   37,  198])

In [25]:
tokenizer_src.tokens_to_string(tokens_src[idx])


"ਛਾਪਾ ਮਾਰਿਆ 'ਚ ਹਸਪਤਾਲ ਸਿਵਲ ਨੇ ਮੰਤਰੀ ਸਿਹਤ"

In [26]:
data_src[idx]

"ਸਿਹਤ ਮੰਤਰੀ ਨੇ ਸਿਵਲ ਹਸਪਤਾਲ 'ਚ ਮਾਰਿਆ ਛਾਪਾ"

In [27]:
tokens_dest[idx]

array([   1,  142, 1039,  268,  128,    2,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0])

In [28]:
tokenizer_dest.tokens_to_string(tokens_dest[idx])

'ssss अस्पताल पहुंचे स्वास्थ्य मंत्री eeee'

In [29]:
data_dest[idx]

'ssss बाढ़सा अस्पताल पहुंचे स्वास्थ्य मंत्री eeee'

In [30]:
encoder_input_data = tokens_src

In [31]:
decoder_input_data = tokens_dest[:, :-1]
decoder_input_data.shape

(1427822, 34)

In [32]:
decoder_output_data = tokens_dest[:, 1:]
decoder_output_data.shape

(1427822, 34)

In [35]:
idx = 2

In [36]:
decoder_input_data[idx]

array([   2,  404,   19,   43,   26,   20,  618,    1, 1451,    5, 9785,
        174,    1,   81,    7,    9,  214,    4,   67, 2200,    9, 1596,
          4,  892, 1762,    8, 1480,  107, 5494,    3,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0])

In [37]:
decoder_output_data[idx]


array([ 404,   19,   43,   26,   20,  618,    1, 1451,    5, 9785,  174,
          1,   81,    7,    9,  214,    4,   67, 2200,    9, 1596,    4,
        892, 1762,    8, 1480,  107, 5494,    3,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0])

In [35]:
tokenizer_dest.tokens_to_string(decoder_input_data[idx])

'ssss अस्पताल पहुंचे स्वास्थ्य मंत्री eeee'

In [34]:
tokenizer_dest.tokens_to_string(decoder_output_data[idx])

'अस्पताल पहुंचे स्वास्थ्य मंत्री eeee'

In [36]:
encoder_input = Input(shape=(None, ), name='encoder_input')

In [37]:
embedding_size = 128

In [38]:
encoder_embedding = Embedding(input_dim=num_words,
                              output_dim=embedding_size,
                              name='encoder_embedding')

In [39]:
state_size = 512

In [40]:
encoder_gru1 = GRU(state_size, name='encoder_gru1',
                   return_sequences=True)
encoder_gru2 = GRU(state_size, name='encoder_gru2',
                   return_sequences=True)
encoder_gru3 = GRU(state_size, name='encoder_gru3',
                   return_sequences=False)

In [41]:
def connect_encoder():
    # Start the neural network with its input-layer.
    net = encoder_input
    
    # Connect the embedding-layer.
    net = encoder_embedding(net)

    # Connect all the GRU-layers.
    net = encoder_gru1(net)
    net = encoder_gru2(net)
    net = encoder_gru3(net)

    # This is the output of the encoder.
    encoder_output = net
    
    return encoder_output

In [42]:
encoder_output = connect_encoder()

In [43]:
decoder_initial_state = Input(shape=(state_size,),
                              name='decoder_initial_state')

In [44]:
decoder_input = Input(shape=(None, ), name='decoder_input')

In [45]:
decoder_embedding = Embedding(input_dim=num_words,
                              output_dim=embedding_size,
                              name='decoder_embedding')

In [46]:
decoder_gru1 = GRU(state_size, name='decoder_gru1',
                   return_sequences=True)
decoder_gru2 = GRU(state_size, name='decoder_gru2',
                   return_sequences=True)
decoder_gru3 = GRU(state_size, name='decoder_gru3',
                   return_sequences=True)

In [47]:
decoder_dense = Dense(num_words,
                      activation='softmax',
                      name='decoder_output')

In [48]:
def connect_decoder(initial_state):
    # Start the decoder-network with its input-layer.
    net = decoder_input

    # Connect the embedding-layer.
    net = decoder_embedding(net)
    
    # Connect all the GRU-layers.
    net = decoder_gru1(net, initial_state=initial_state)
    net = decoder_gru2(net, initial_state=initial_state)
    net = decoder_gru3(net, initial_state=initial_state)

    # Connect the final dense layer that converts to
    # one-hot encoded arrays.
    decoder_output = decoder_dense(net)
    
    return decoder_output

In [49]:
decoder_output = connect_decoder(initial_state=encoder_output)

model_train = Model(inputs=[encoder_input, decoder_input],
                    outputs=[decoder_output])

In [50]:
model_encoder = Model(inputs=[encoder_input],
                      outputs=[encoder_output])

In [51]:
decoder_output = connect_decoder(initial_state=decoder_initial_state)

model_decoder = Model(inputs=[decoder_input, decoder_initial_state],
                      outputs=[decoder_output])

In [52]:
model_train.compile(optimizer=RMSprop(lr=1e-3),
                    loss='sparse_categorical_crossentropy')

In [53]:
path_checkpoint = '21_checkpoint.keras'
callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint,
                                      monitor='val_loss',
                                      verbose=1,
                                      save_weights_only=True,
                                      save_best_only=True)

In [54]:
callback_early_stopping = EarlyStopping(monitor='val_loss',
                                        patience=3, verbose=1)

In [55]:
callback_tensorboard = TensorBoard(log_dir='./21_logs/',
                                   histogram_freq=0,
                                   write_graph=False)

In [56]:
callbacks = [callback_early_stopping,
             callback_checkpoint,
             callback_tensorboard]

In [57]:
try:
    model_train.load_weights(path_checkpoint)
except Exception as error:
    print("Error trying to load checkpoint.")
    print(error)

Error trying to load checkpoint.
Unable to open file (unable to open file: name = '21_checkpoint.keras', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)


In [58]:
x_data = \
{
    'encoder_input': encoder_input_data,
    'decoder_input': decoder_input_data
}

In [59]:
y_data = \
{
    'decoder_output': decoder_output_data
}

In [60]:
validation_split = 10000 / len(encoder_input_data)
validation_split

0.007003674127447259

In [62]:
model_train.fit(x=x_data,
                y=y_data,
                batch_size=384,
                epochs=1,
                validation_split=validation_split,
                callbacks=callbacks)

Epoch 00001: val_loss improved from inf to 1.35185, saving model to 21_checkpoint.keras


<tensorflow.python.keras.callbacks.History at 0x28e978da8e0>

In [63]:
def translate(input_text, true_output_text=None):
    """Translate a single text-string."""

    # Convert the input-text to integer-tokens.
    # Note the sequence of tokens has to be reversed.
    # Padding is probably not necessary.
    input_tokens = tokenizer_src.text_to_tokens(text=input_text,
                                                reverse=True,
                                                padding=True)
    
    # Get the output of the encoder's GRU which will be
    # used as the initial state in the decoder's GRU.
    # This could also have been the encoder's final state
    # but that is really only necessary if the encoder
    # and decoder use the LSTM instead of GRU because
    # the LSTM has two internal states.
    initial_state = model_encoder.predict(input_tokens)

    # Max number of tokens / words in the output sequence.
    max_tokens = tokenizer_dest.max_tokens

    # Pre-allocate the 2-dim array used as input to the decoder.
    # This holds just a single sequence of integer-tokens,
    # but the decoder-model expects a batch of sequences.
    shape = (1, max_tokens)
    decoder_input_data = np.zeros(shape=shape, dtype=np.int)

    # The first input-token is the special start-token for 'ssss '.
    token_int = token_start

    # Initialize an empty output-text.
    output_text = ''

    # Initialize the number of tokens we have processed.
    count_tokens = 0

    # While we haven't sampled the special end-token for ' eeee'
    # and we haven't processed the max number of tokens.
    while token_int != token_end and count_tokens < max_tokens:
        # Update the input-sequence to the decoder
        # with the last token that was sampled.
        # In the first iteration this will set the
        # first element to the start-token.
        decoder_input_data[0, count_tokens] = token_int

        # Wrap the input-data in a dict for clarity and safety,
        # so we are sure we input the data in the right order.
        x_data = \
        {
            'decoder_initial_state': initial_state,
            'decoder_input': decoder_input_data
        }

                # Note that we input the entire sequence of tokens
        # to the decoder. This wastes a lot of computation
        # because we are only interested in the last input
        # and output. We could modify the code to return
        # the GRU-states when calling predict() and then
        # feeding these GRU-states as well the next time
        # we call predict(), but it would make the code
        # much more complicated.

        # Input this data to the decoder and get the predicted output.
        decoder_output = model_decoder.predict(x_data)

        # Get the last predicted token as a one-hot encoded array.
        token_onehot = decoder_output[0, count_tokens, :]
        
        # Convert to an integer-token.
        token_int = np.argmax(token_onehot)

        # Lookup the word corresponding to this integer-token.
        sampled_word = tokenizer_dest.token_to_word(token_int)

        # Append the word to the output-text.
        output_text += " " + sampled_word

        # Increment the token-counter.
        count_tokens += 1

    # Sequence of tokens output by the decoder.
    output_tokens = decoder_input_data[0]
    
    # Print the input-text.
    print("Input text:")
    print(input_text)
    print()

    # Print the translated output-text.
    print("Translated text:")
    print(output_text)
    print()

    # Optionally print the true translated text.
    if true_output_text is not None:
        print("True output text:")
        print(true_output_text)
        print()

In [64]:
idx = 3
translate(input_text=data_src[idx],
          true_output_text=data_dest[idx])

Input text:
( ਅ) ਇਕ ਵਿਅਕਤੀ ਨੂੰ ਕਦੋਂ ਬਪਤਿਸਮਾ ਲੈਣਾ ਚਾਹੀਦਾ ਹੈ?

Translated text:
 ख एक मसीही को एक मसीही को चाहिए eeee

True output text:
ssss ( क) समझाइए कि बपतिस्मा लेना क्यों एक गंभीर फैसला है । eeee



In [65]:
idx = 4
translate(input_text=data_src[idx],
          true_output_text=data_dest[idx])

Input text:
ਜਿਸ ਨਾਲ ਇੱਕ ਨੌਜਵਾਨ ਦੀ ਮੌਤ ਹੋ ਗਈ ਜਦਕਿ ਦੂਜੇ ਦੀ ਹਾਲਤ ਗੰਭੀਰ ਬਣੀ ਹੋਈ ਹੈ।

Translated text:
 इस हादसे में एक व्यक्ति की मौत हो गई जबकि एक गंभीर रूप से घायल हो गया है। eeee

True output text:
ssss एक युवक की मौत हो गई, वहीं दूसरा युवक गंभीर रूप से घायल हो गया। eeee



In [66]:
idx = 3
translate(input_text=data_src[idx] + data_src[idx+1],
          true_output_text=data_dest[idx] + data_dest[idx+1])

Input text:
( ਅ) ਇਕ ਵਿਅਕਤੀ ਨੂੰ ਕਦੋਂ ਬਪਤਿਸਮਾ ਲੈਣਾ ਚਾਹੀਦਾ ਹੈ?ਜਿਸ ਨਾਲ ਇੱਕ ਨੌਜਵਾਨ ਦੀ ਮੌਤ ਹੋ ਗਈ ਜਦਕਿ ਦੂਜੇ ਦੀ ਹਾਲਤ ਗੰਭੀਰ ਬਣੀ ਹੋਈ ਹੈ।

Translated text:
 ख एक दूसरे को एक दूसरे को एक दूसरे को एक दूसरे को एक दूसरे से एक एक या एक दूसरे को एक दूसरे को एक दूसरे को एक दूसरे को देना चाहिए और एक

True output text:
ssss ( क) समझाइए कि बपतिस्मा लेना क्यों एक गंभीर फैसला है । eeeessss एक युवक की मौत हो गई, वहीं दूसरा युवक गंभीर रूप से घायल हो गया। eeee



In [67]:
idx = 3
translate(input_text=data_src[idx+1] + data_src[idx],
          true_output_text=data_dest[idx+1] + data_dest[idx])


Input text:
ਜਿਸ ਨਾਲ ਇੱਕ ਨੌਜਵਾਨ ਦੀ ਮੌਤ ਹੋ ਗਈ ਜਦਕਿ ਦੂਜੇ ਦੀ ਹਾਲਤ ਗੰਭੀਰ ਬਣੀ ਹੋਈ ਹੈ।( ਅ) ਇਕ ਵਿਅਕਤੀ ਨੂੰ ਕਦੋਂ ਬਪਤਿਸਮਾ ਲੈਣਾ ਚਾਹੀਦਾ ਹੈ?

Translated text:
 इस हमले में एक व्यक्ति की मौत हो गई जबकि एक गंभीर रूप से घायल हो गया है। eeee

True output text:
ssss एक युवक की मौत हो गई, वहीं दूसरा युवक गंभीर रूप से घायल हो गया। eeeessss ( क) समझाइए कि बपतिस्मा लेना क्यों एक गंभीर फैसला है । eeee



In [71]:
idx = 9
translate(input_text=data_src[idx],
          true_output_text=data_dest[idx])

Input text:
[ ਸਫ਼ਾ 7 ਉੱਤੇ ਡੱਬੀ / ਤਸਵੀਰਾਂ]

Translated text:
 पेज 9 पर बक्स तसवीर eeee

True output text:
ssss [ पेज ७ पर बक्स / तसवीरें] “ जो मायूस हैं, उन्हें अपनी बातों से तसल्ली दो ” eeee



In [73]:
idx = 90
translate(input_text=data_src[idx],
          true_output_text=data_dest[idx])

Input text:
ਭਰਾ -ਬੰਧੁਵਾਂਅਤੇ ਗੁਆੰਡੀਆਂ ਦੇ ਨਾਲ ਖੂਬ ਚੰਗੇ ਸੰਬੰਧ ਰਹਾਂਗੇ ।

Translated text:
 भाई के साथ भी अच्छा रहेगा। eeee

True output text:
ssss बंधु-बांधवों के साथ आपका व्यवहार अच्छा रहेगा। eeee



In [72]:
idx = 10
translate(input_text=data_src[idx],
          true_output_text=data_dest[idx])

Input text:
ਪੜਤਾਲ ਮਗਰੋਂ ਕਸੂਰਵਾਰਾਂ ਖ਼ਿਲਾਫ਼ ਕਾਰਵਾਈ ਕੀਤੀ ਜਾਵੇਗੀ।

Translated text:
 जांच के बाद कार्रवाई की जाएगी। eeee

True output text:
ssss जांच पूरी होने के बाद आरोपियों के खिलाफ आगे की कार्रवाई की जाएगी। eeee



In [68]:
translate(input_text="ਇਹ ਸ਼ੁੱਕਰਵਾਰ ਹੈ",
          true_output_text='आज शुक्रवार है')

Input text:
ਇਹ ਸ਼ੁੱਕਰਵਾਰ ਹੈ

Translated text:
 यह एक है। eeee

True output text:
आज शुक्रवार है



In [69]:
translate(input_text="ਜਨਮਦਿਨ ਮੁਬਾਰਕ",
          true_output_text="जन्मदिन मुबारक")

Input text:
ਜਨਮਦਿਨ ਮੁਬਾਰਕ

Translated text:
 आप खुश eeee

True output text:
जन्मदिन मुबारक



In [70]:
translate(input_text="ਸਾਨੂੰ ਚੱਲਣਾ ਚਾਹੀਦਾ ਹੈ",
          true_output_text="अब चलें")

Input text:
ਸਾਨੂੰ ਚੱਲਣਾ ਚਾਹੀਦਾ ਹੈ

Translated text:
 हमें चाहिए eeee

True output text:
अब चलें



In [75]:
idx = 30
translate(input_text=data_src[idx],
          true_output_text=data_dest[idx])

Input text:
ਉਨ੍ਹਾਂ ਕਿਹਾ ਕਿ ਕਿਸਾਨਾਂ ਦੀਆਂ ਜ਼ਮੀਨਾਂ ਖੋਹੀਆਂ ਜਾ ਰਹੀਆਂ ਹਨ।

Translated text:
 उन्होंने कहा कि किसानों की मांगें हो रही है। eeee

True output text:
ssss उन्होंने कहा कि सरकार किसानों से मुंह फेर रही है। eeee



In [76]:





idx = 29
translate(input_text=data_src[idx],
          true_output_text=data_dest[idx])

Input text:
ਕਲਕੱਤੇ ਦੇ ਲੋਕਾਂ ਦੇ ਭੋਜਨ ਵਿਚ ਮੱਛੀ ਜ਼ਰੂਰੀ ਹੈ, ਅਤੇ ਵੱਡੇ - ਵੱਡੇ ਬਾਜ਼ਾਰਾਂ ਵਿਚ ਕਈ ਕਿਸਮ ਦੀਆਂ ਮੱਛੀਆਂ, ਮੀਟ, ਅਤੇ ਸਬਜ਼ੀਆਂ ਮਿਲਦੀਆਂ ਹਨ ।

Translated text:
 के लिए कई लोग के लिए बहुत सी हैं और इस तरह के कई फल हैं । eeee

True output text:
ssss और कलकत्ता के स्वादिष्ट भोजन में चार चाँद लगाती हैं वहाँ की मशहूर मिठाइयाँ । eeee

