1. Implement a system which transform a text in a Markov chain and generate a saying or a poem in romanian

In [14]:
import random
import string

def remove_punctuation(text):
    translation_table = str.maketrans('', '', string.punctuation)
    return text.translate(translation_table)

def build_markov_chain(text, order=3):
    text = remove_punctuation(text)
    text = text.lower()
    words = text.split()
    chain = {}
    for i in range(len(words) - order):
        prefix = tuple(words[i:i+order])
        suffix = words[i+order]
        if prefix in chain:
            chain[prefix].append(suffix)
        else:
            chain[prefix] = [suffix]
    return chain


In [15]:
def generate_poem1(chain, rows=4):
    random.seed(random.randint(0, 1000))
    result = []
    current_words = random.choice(list(chain.keys()))
    for i in range(rows):
        random.seed(random.randint(0, 1000))
        # The first word of the row is capitalized
        if i == 0:
            while current_words[0][0].islower():
                current_words = random.choice(list(chain.keys()))
            # concatenate the current words to the result
            result.extend(current_words)
        next_word = random.choice(chain[current_words])
        while next_word[0].islower():
            result.append(next_word)
            current_words = tuple(result[-len(current_words):])
            next_word = random.choice(chain[current_words])
        result.append(next_word)
        current_words = tuple(result[-len(current_words):])
    
    # Add a newline after each line
    for i in range(len(result) - 1):
        if result[i+1][0].isupper():
          result[i] += '\n'
    return ' '.join(result[:-1])

def poetry_structure(poem, rows=4, words_per_row=6):
    poem[0] = poem[0].capitalize()
    for i in range(rows):
        poem[(i+1)*words_per_row-1] += '\n'
        if (i+1)*words_per_row < len(poem):
            poem[(i+1)*words_per_row] = poem[(i+1)*words_per_row].capitalize()

def generate_poem2(chain, rows=4, words_per_row=6):
    random.seed(random.randint(0, 1000))
    current_words = random.choice(list(chain.keys()))
    result = list(current_words)
    for i in range(rows*words_per_row):
        random.seed(random.randint(0, 1000))
        next_word = random.choice(chain[current_words])
        result.append(next_word)
        current_words = tuple(result[-len(current_words):])
    poetry_structure(result, rows, words_per_row)
    return ' '.join(result) + '.'

Generate sayings

In [16]:
def generate_sayings(chain, words=6):
    random.seed(random.randint(0, 1000))
    current_words = random.choice(list(chain.keys()))
    result = list(current_words)
    for _ in range(words - len(current_words)):
        next_word = random.choice(chain[current_words])
        result.append(next_word)
        current_words = tuple(result[-len(current_words):])
    result[0] = result[0].capitalize()
    return ' '.join(result) + '.'

Main

In [17]:
if __name__ == '__main__':
    # Read the text from data/corpus_complet.txt and build the Markov chain
    with open('data/corpus_complet.txt') as f:
        text = f.read()
    chain = build_markov_chain(text, order=3)
    # Generate a poem
    poem = generate_poem2(chain)

    print("Poem:")
    print(poem)
    # Generate some sayings
    with open('data/proverbe.txt') as f:
        text = f.read()
    chain = build_markov_chain(text, order=4)
    print("Sayings:")
    for i in range(5):
        print(generate_sayings(chain))

Poem:
Pe negrele plăceri şiademenind lumina cu
 Îngeriiţi perfizi în iadul tău tragi
 Cerul şi astfel mil deschizi eu
 Nam fost niciodată ucenic dintrunceput deplin
 Maestru numa în.
Sayings:
Roade cine nu nici carne moale.
Sacul lacomului nu ii ajunge oltu.
Imi e baba rada ce imi.
Bordeie atatea obiceie ce am avut.
Se numara bobocii toate drumurile duc.


2. a.Generate a stanza of poetry in English language

In [18]:
%pip install markovify
%pip install gensim

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.
Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [19]:
def generate_poetry(model, rows=4):      
    return '\n'.join(model.make_sentence() for _ in range(rows))

b. Calculate the emotion of the generated text

In [20]:
%pip install nltk

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [21]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

def sentiment_analysis(text):
    sia = SentimentIntensityAnalyzer()
    return sia.polarity_scores(text)

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/florin/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


c. Replace randomly words with their synonyms

In [22]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import os
from gensim.models import Word2Vec
from nltk.corpus import stopwords

nltk.download('punkt')
nltk.download('stopwords')

def create_word2vec_model(text):
    text = remove_punctuation(text.lower())
    tokens = nltk.word_tokenize(text)

    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]

    model = Word2Vec([tokens], min_count=1)

    return model


def get_synonyms(model, word, topn=5):
    if word not in model.wv:
        return []
    synonyms = model.wv.most_similar(word, topn=topn)
    return [syn[0] for syn in synonyms]

def reconstruct_sentence(text):
    reconstructed_sentence = ""
    for i, token in enumerate(text):
        if i > 0 and token[0] in string.punctuation:
            reconstructed_sentence += token
        else:
            reconstructed_sentence += " " + token
    return reconstructed_sentence.strip()
    
def replace_words(model, text):
    result = []
    for line in text.split('\n'):
        words_line = nltk.word_tokenize(line)
        for i, word in enumerate(words_line):
            random.seed(random.randint(0, 1000))

            if random.randint(0, 50) > 40:
                synonyms = get_synonyms(model, word)
                if synonyms:
                    words_line[i] = random.choice(synonyms)

        result.append(reconstruct_sentence(words_line))
    return '\n'.join(result)

[nltk_data] Downloading package punkt to /home/florin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/florin/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


e. Calculate BLEU metric

In [23]:

def calculate_bleu_score(reference_poems, generated_poem):
    bleu_scores = []
    for reference_poem in reference_poems:
        bleu_score = nltk.translate.bleu_score.sentence_bleu([reference_poem], generated_poem)
        bleu_scores.append(bleu_score)
    
    average_bleu_score = sum(bleu_scores) / len(bleu_scores)
    return average_bleu_score, nltk.translate.bleu_score.sentence_bleu(reference_poems, generated_poem)


In [24]:
import markovify

if __name__ == '__main__':
    with open("data/shakespear.txt") as f:
        text = f.read()

    poetry_model = markovify.NewlineText(text)

    poetry = generate_poetry(poetry_model, rows=10)
    print("Poetry:")
    print(poetry)
    scores = sentiment_analysis(poetry)
    print("\nSentiment analysis:")
    print(scores)
    print("\nPoetry after replacing words:")
    model = create_word2vec_model(text)
    poetry_with_synonyms = replace_words(model, poetry)
    print(poetry_with_synonyms)
    print("\nBleu score:")
    text_without_punctuation = remove_punctuation(text)
    reference_poems = text_without_punctuation.lower().split('\n\n')
    avg_bleu_score, bleu_score = calculate_bleu_score(reference_poems, remove_punctuation(poetry).lower())
    print("Average bleu score: " + str(avg_bleu_score))
    print("Bleu score: " + str(bleu_score))

Poetry:


Make answer Muse: wilt thou be denied!
Thy self away, art present still with thee shall stay.
So shall those blots that do with me after I am now,
Is perjured, murderous, bloody, full of your desire?
And keep invention in a kind of praise;
Say that thou teachest how to make love groan;
Or if it do, not from my mistress reeks.
Of bird, of flower, or shape which it fears to lose.
Were't aught to me I am dead
Thou art the grave and thee.

Sentiment analysis:
{'neg': 0.212, 'neu': 0.692, 'pos': 0.095, 'compound': -0.9097}

Poetry after replacing words:
Make haply Muse: wilt thee be hungry!
Thy self art, art present still with love shall stay.
So let those blots that do with me after I am now,
Is perjured, suggest, bloody, full of your desire?
And keep invention in a kind of praise;
Say that art teachest how to make love groan;
Or if it do, not from my mistress reeks.
Of patient, of flower, or shape which it fears to lose.
Were't aught to me I am dead
Thou eyes the grave and thee.

Bleu sc