In [34]:
import pandas as pd
import markovify

ocr_results_url = "https://raw.githubusercontent.com/erickbytes/positivipy/main/Positive_Thoughts_From_Google_Vision.csv"
positive_thoughts = pd.read_csv(ocr_results_url)
positive_thoughts = positive_thoughts.dropna().fillna('')
print(positive_thoughts.head())

                                                Text
0  CLIMB MOUNTAINS NOT SO\nTHE WORLD CAN SEE YOU,...
1  YOUP EEFE\nIS A SERIES OF\nMOMENTS I "NOW."\n'...
2  What defines us is\nhow well we rise\nafter fa...
3  "One today is worth two tomorrows;\nwhat I am ...
4  YOUR MIND IS A GARDEN\nYOUR THOUGHTS ARE THE S...


In [35]:
positive_thoughts['Quotes'] = (positive_thoughts.Text.astype(str)
                                                 .str.replace(pat='\n', repl=' ', regex=False)
                                                 .str.strip())
# get rid of weird non-ascii characters
positive_thoughts.Quotes = (positive_thoughts.Quotes.apply(lambda word:word.encode('ascii',errors='ignore'))
                                                    .apply(lambda word:word.decode('utf-8'))
                                                    .str.strip()
                                                    .str.lower())
positive_thoughts.Quotes = positive_thoughts.Quotes.str.replace(pat='"', repl='', regex=False).str.strip()
positive_thoughts.Quotes

0      climb mountains not so the world can see you, ...
1      youp eefe is a series of moments i now. 'the b...
2      what defines us is how well we rise after fall...
3      one today is worth two tomorrows; what i am to...
4      your mind is a garden your thoughts are the se...
                             ...                        
767    it may be that when we no longer know what to ...
768    most people overestimate what they can do in o...
769    walk tall, kick ass, learn to speak arabic, lo...
770                                    take at least one
771    begin at once to live, and count each separate...
Name: Quotes, Length: 770, dtype: object

In [36]:
def clean_quotes(x):
    quote = ' '.join([word for word in x.split(' ') if '.co' not in word\
                                        and 'www' not in word\
                                        and 'quot' not in word\
                                        and '.net' not in word\
                                        and '.org' not in word\
                                        and '#' not in word\
                                        and '@' not in word]
                                        )
    return quote

# removing websites and other source info from quotes
positive_thoughts.Quotes = positive_thoughts.Quotes.apply(lambda x:clean_quotes(x))

# removes these characters: ()|/*[]
# remove extra spaces, ocr "v" formatting
positive_thoughts.Quotes = (positive_thoughts.Quotes.str.replace(pat='\(|\|||\)|/|\*|\[|\]', repl='', regex=True)
                                                    .str.replace(pat='  ', repl=' ', regex=False)
                                                    .str.replace(pat=' v ', repl='. ', regex=False)
                                                    .str.strip()
                                                    .str.capitalize())
positive_thoughts['Authors'] = positive_thoughts.Quotes.str.split(pat='-', expand=True)[1].str.strip()
positive_thoughts['Quotes'] = positive_thoughts.Quotes.str.split(pat='-', expand=True)[0].str.strip()
positive_thoughts.head()
# positive_thoughts.to_csv('Positive_Thoughts_Cleaned.csv',index=False)

Unnamed: 0,Text,Quotes,Authors
0,"CLIMB MOUNTAINS NOT SO\nTHE WORLD CAN SEE YOU,...","Climb mountains not so the world can see you, ...",
1,"YOUP EEFE\nIS A SERIES OF\nMOMENTS I ""NOW.""\n'...",Youp eefe is a series of moments i now. 'the b...,
2,What defines us is\nhow well we rise\nafter fa...,What defines us is how well we rise after fall...,
3,"""One today is worth two tomorrows;\nwhat I am ...",One today is worth two tomorrows; what i am to...,
4,YOUR MIND IS A GARDEN\nYOUR THOUGHTS ARE THE S...,Your mind is a garden your thoughts are the se...,


In [37]:
# use a manually corrected file
clean_quotes_url = 'https://raw.githubusercontent.com/erickbytes/positivipy/main/Positive_Thoughts_Manually_Cleaned.csv'
positive_thoughts = pd.read_csv(clean_quotes_url)
positive_thoughts = positive_thoughts.fillna('').astype(str)
quotes = positive_thoughts.Quotes.str.capitalize().tolist()
text = '. '.join(quotes)
text



In [38]:
# Build a markov chain model.
text_model = markovify.Text(text)

# Print five randomly-generated sentences
for i in range(5):
    print(f'{text_model.make_sentence()}\n')

Life is like a fresh shirt on our ability, on waking, to pick it up..

Pick your battles. you don't need to express love more than silence..

May everyone be free from misery may no one can walk it for you..

Eternalism philosophy of time dealing with a focus on sinking only the imperative of change..

Every night when i had been wiped clean by summer..



In [39]:
# Print three randomly-generated sentences of no more than 280 characters
for i in range(3):
    print(f'{text_model.make_short_sentence(280)}\n')

You have power over you..

When you want to do..

The mystery isn't a problem to solve but a dream just because of its persistence..

