<h2> Building a Markov Chain </h2>
<br>
<p> Using the twitter history of Boys' Life Magazine, create the probabilistic foundation for a Markov Chain and then generate new (and hopefully entertaining) tweets in the voice of Boys' Life</p>
<p>The timeline used is from Tue Apr 28, 2015 until Thursday Apr 13, 2017</p>
<br>
<sub>1. http://www.onthelambda.com/2014/02/20/how-to-fake-a-sophisticated-knowledge-of-wine-with-markov-chains/ </sub><br>
<sub>2. https://codereview.stackexchange.com/questions/24276/implementation-of-a-markov-chain</sub>

In [60]:
#standard library
import sys
print("Python {}".format(sys.version_info[:3]))
import string
import random

#packages
import twython
from twython import Twython
from twython import TwythonStreamer
print("twython v.{}".format(twython.__version__))
import pandas as pd
print("pandas v.{}".format(pd.__version__))
import ujson as json
print('ujson v.{}'.format(json.__version__))

Python (3, 5, 2)
twython v.3.4.0
pandas v.0.19.2
ujson v.1.35


In [9]:
with open('D:\\Python\\twitter_credentials.json', 'r') as c:
    credentials = json.load(c)

CONSUMER_KEY = credentials['CONSUMER_KEY']
CONSUMER_SECRET = credentials['CONSUMER_SECRET']
ACCESS_TOKEN = credentials['ACCESS_TOKEN']
ACCESS_TOKEN_SECRET = credentials['ACCESS_TOKEN_SECRET']

In [82]:
def get_tweets(twitter_id):
    tweets = []
    
    twitter = Twython(CONSUMER_KEY, CONSUMER_SECRET)
    
    result = twitter.get_user_timeline(screen_name = twitter_id,
                                       include_rts = False,
                                       count = 200)
    last_id = result[-1]['id']
    
    tweets+=result
    
    continuation = True
    while True:
        result = twitter.get_user_timeline(screen_name = twitter_id, 
                                           include_rts = False, 
                                           count = 200, 
                                           max_id = last_id-1)
        
        if len(result)>0 and len(tweets)<10000:
            tweets+=result
            last_id = result[-1]['id']
        else:
            break
    
    return(tweets)

In [24]:
bl_tweets = get_tweets('@BoysLife')

In [49]:
tweet_texts = []

for tweet in bl_tweets:
    tweet_texts.append("TWEETSTART "+tweet['text'].replace("'","").replace('"',"").replace('\xa0'," ").split('http')[0]+" TWEETEND")

In [57]:
#build Markov Chain Dictionary of words and those that follow them
m_chain = {}

for tweet in tweet_texts:
    tweet_words = tweet.split()
    for position, word in enumerate(tweet_words[:-1]):
        if word in m_chain.keys():
            m_chain[word].append(tweet_words[position+1])
        else:
            m_chain.update({word:[tweet_words[position+1]]})

In [75]:
#Generate new tweets...

def get_new_tweet():
    next_word = "TWEETSTART"
    new_tweet = []

    while True:
        next_word = random.choice(m_chain[next_word])
        if next_word == "TWEETEND":
            break
        new_tweet.append(next_word)
    return(' '.join(new_tweet))

In [76]:
new_tweets = []
for x in range(500):
    new_tweets.append(get_new_tweet())
    
with open("D:\\BSA\\magazines_fun\\new_tweets3.txt","w") as f:
    for tweet in new_tweets:
        f.write(tweet+"\n")

<h2>...And now the slightly more evil version: A Donald Trump Tweet markov chain...</h2>

In [78]:
trump_tweets = get_tweets('@realDonaldTrump')

tweet_texts = []

for tweet in trump_tweets:
    tweet_texts.append("TWEETSTART "+tweet['text'].replace("'","").replace('"',"").replace('\xa0'," ").split('http')[0]+" TWEETEND")
    
trump_chain = {}

for tweet in tweet_texts:
    tweet_words = tweet.split()
    for position, word in enumerate(tweet_words[:-1]):
        if word in trump_chain.keys():
            trump_chain[word].append(tweet_words[position+1])
        else:
            trump_chain.update({word:[tweet_words[position+1]]})

In [83]:
def new_trump_tweet():
    next_word = "TWEETSTART"
    new_tweet = []

    while True:
        next_word = random.choice(trump_chain[next_word])
        if next_word == "TWEETEND":
            break
        new_tweet.append(next_word)
    return(' '.join(new_tweet))

In [85]:
new_tweets = []
for x in range(500):
    new_tweets.append(new_trump_tweet())
    
with open("D:\\trump_tweets.txt","w") as f:
    for tweet in new_tweets:
        try: f.write(tweet+"\n")
        except: continue