In [1]:
import pandas as pd
import markovify

ocr_results_url = "https://raw.githubusercontent.com/erickbytes/positivipy/main/Positive_Thoughts_From_Google_Vision.csv"
positive_thoughts = pd.read_csv(ocr_results_url)
positive_thoughts = positive_thoughts.dropna().fillna('')
print(positive_thoughts.head())



                                                Text
0  CLIMB MOUNTAINS NOT SO\nTHE WORLD CAN SEE YOU,...
1  YOUP EEFE\nIS A SERIES OF\nMOMENTS I "NOW."\n'...
2  What defines us is\nhow well we rise\nafter fa...
3  "One today is worth two tomorrows;\nwhat I am ...
4  YOUR MIND IS A GARDEN\nYOUR THOUGHTS ARE THE S...


In [2]:
positive_thoughts['Quotes'] = (positive_thoughts.Text.astype(str)
                                                 .str.replace(pat='\n', repl=' ', regex=False)
                                                 .str.strip())
positive_thoughts.Quotes

0      CLIMB MOUNTAINS NOT SO THE WORLD CAN SEE YOU, ...
1      YOUP EEFE IS A SERIES OF MOMENTS I "NOW." 'THE...
2      What defines us is how well we rise after fall...
3      "One today is worth two tomorrows; what I am t...
4      YOUR MIND IS A GARDEN YOUR THOUGHTS ARE THE SE...
                             ...                        
767    IT MAY BE THAT WHEN WE NO LONGER KNOW WHAT TO ...
768    " Most people overestimate what they can do in...
769    Walk tall, kick ass, learn to speak Arabic, lo...
770                                    TAKE AT LEAST ONE
771    BEGIN AT ONCE TO LIVE, AND COUNT EACH SEPARATE...
Name: Quotes, Length: 770, dtype: object

In [3]:
# get rid of weird non-ascii characters
positive_thoughts.Quotes = (positive_thoughts.Quotes.apply(lambda word:word.encode('ascii',errors='ignore'))
                                                    .apply(lambda word:word.decode('utf-8'))
                                                    .str.strip()
                                                    .str.lower())
positive_thoughts.Quotes = positive_thoughts.Quotes.str.replace(pat='"', repl='', regex=False).str.strip()

# removing websites and other source info from quotes
positive_thoughts.Quotes = positive_thoughts.Quotes.apply(lambda x:' '.join(
                                                [word for word in x.split(' ') if '.co' not in word\
                                                                                and 'www' not in word\
                                                                                and 'quot' not in word\
                                                                                and '.net' not in word\
                                                                                and '.org' not in word\
                                                                                and '#' not in word\
                                                                                and '@' not in word]
                                                                                ))

# removes these characters: ()|/*[]
positive_thoughts.Quotes = (positive_thoughts.Quotes.str.replace(pat='\(|\|||\)|/|\*|\[|\]', repl='', regex=True)
                                                    .str.replace(pat='  ', repl=' ', regex=False)
                                                    .str.replace(pat=' v ', repl='. ', regex=False)
                                                    .str.strip()
                                                    .str.capitalize())
positive_thoughts['Authors'] = positive_thoughts.Quotes.str.split(pat='-', expand=True)[1].str.strip()
positive_thoughts['Quotes'] = positive_thoughts.Quotes.str.split(pat='-', expand=True)[0].str.strip()
positive_thoughts.head()
positive_thoughts.to_csv('Positive_Thoughts_Cleaned.csv',index=False)

In [4]:
quotes = positive_thoughts.Quotes.tolist()
text = '. '.join(quotes)
text



In [5]:
# Build a markov chain model.
text_model = markovify.Text(text)

# Print five randomly-generated sentences
for i in range(5):
    print(f'{text_model.make_sentence()}\n')

I'm lazy. but it's the lazy people who are thankful, it is dressed in overalls and looks like work. thomas edison american inventor and businessman.

Does this really matter in the end. john lennon com.

14 learn the rules and then go do talat..

35 martin luther king, jr..

There was a german philosopher friedrich nietzsche. it can be challenged. every man is not final. failure is not preparation for life; education is life john dewey.



In [6]:
# Print three randomly-generated sentences of no more than 280 characters
for i in range(3):
    print(f'{text_model.make_sentence()}\n')

To improve is to change the game. stevie nicks az.

All the days that you were yesterday..

Education is learning what you feel, you attract. what you already know. but if you can't steer a parked car. the thing you can. e try to be doing something else. the trick is the key to success in business, education, pro football, anything that you do what makes us suffer. what makes you who you fucking are..

