In [1]:
import numpy as np
import sympy as sy
import scipy.stats as stats
import matplotlib.pyplot as plt

### Problem 1

In [2]:
def random_markov(n):
    """Create and return a transition matrix for a random Markov chain with
    'n' states. This should be stored as an nxn NumPy array.
    """
    rm = np.random.random(size=(n,n))
    rm /= np.sum(rm, axis=0, keepdims=1)
    return rm

In [3]:
rand_transmat = random_markov(3)
rand_transmat

array([[ 0.50663321,  0.58630077,  0.20990583],
       [ 0.40803823,  0.25365826,  0.47012448],
       [ 0.08532856,  0.16004097,  0.3199697 ]])

In [4]:
#check if column-stochastic
np.sum(rand_transmat, axis=0)

array([ 1.,  1.,  1.])

### Problem 2

In [5]:
# Problem 2
def forecast(days):
    """Forecast weather in days days given that today is hot."""
    transition = np.array([[0.7, 0.6], [0.3, 0.4]])

    # Sample from a binomial distribution to choose a new state.
    current_ind = 0
    preds = []
    #we evaluate if the draw will result to a succesful transition of states
    for i in range(days):
        outcome = np.random.binomial(1, transition[abs(current_ind-1),current_ind])
        if outcome: #meaning succesful in transitioning to another state
            current_ind = abs(current_ind-1)
        preds.append(current_ind)
    return np.array(preds)

In [6]:
#predict the weather in 20 days
forecast(20)

array([0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0])

### Problem 3

In [7]:
def four_state_forecast(days):
    """Run a simulation for the weather over the specified number of days,
    with mild as the starting state, using the four-state Markov chain.
    Return a list containing the day-by-day results, not including the
    starting day.

    Examples:
        >>> four_state_forecast(3)
        [0, 1, 3]
        >>> four_state_forecast(5)
        [2, 1, 2, 1, 1]
    """
    transmat = np.array([[0.5, 0.3, 0.1, 0.0],
                         [0.3, 0.3, 0.3, 0.3],
                         [0.2, 0.3, 0.4, 0.5],
                         [0.0, 0.1, 0.2, 0.2]])
    preds = []
    today = 1 #mild - initial weather
    for i in range(days):
        today = np.argmax(np.random.multinomial(1, transmat[:,today]))  
        preds.append(today)
    return np.array(preds)

In [8]:
four_state_forecast(20)

array([0, 1, 0, 1, 2, 2, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 1, 2, 2, 0])

### Problem 4

In [9]:
def steady_state(A, tol=1e-12, N=40):
    """Compute the steady state of the transition matrix A.

    Inputs:
        A ((n,n) ndarray): A column-stochastic transition matrix.
        tol (float): The convergence tolerance.
        N (int): The maximum number of iterations to compute.

    Raises:
        ValueError: if the iteration does not converge within N steps.

    Returns:
        x ((n,) ndarray): The steady state distribution vector of A.
    """
    x0 = np.random.random(len(A))
    x0 /=np.sum(x0)
    for i in range(1,N+1):
        x1 = np.dot(A, x0)
        if np.linalg.norm(x1 - x0)<tol:
            return x1
        x0 = x1
    raise ValueError("A^k does not converge given N iterations")

In [10]:
#for problem 2
transmat1= np.array([[0.7, 0.6],
                     [0.3, 0.4]])
steady_state(transmat1)

array([ 0.66666667,  0.33333333])

In [11]:
#check it with the forecasted weather using large number of days
preds1000 = forecast(100000)
states = [0, 1]
counts = [np.mean(preds1000==i) for i in states]
counts

[0.66281000000000001, 0.33718999999999999]

In [12]:
#for problem 3
transmat2 = np.array([[0.5, 0.3, 0.1, 0.0],
                      [0.3, 0.3, 0.3, 0.3],
                      [0.2, 0.3, 0.4, 0.5],
                      [0.0, 0.1, 0.2, 0.2]])
steady_state(transmat2)

array([ 0.24655172,  0.3       ,  0.33275862,  0.12068966])

In [13]:
#check it with the forecasted weather using large number of days
preds1000 = four_state_forecast(100000)
states = [0, 1, 2, 3]
counts = [np.mean(preds1000==i) for i in states]
counts

[0.24235999999999999,
 0.30127999999999999,
 0.33584999999999998,
 0.12051000000000001]

### Problem 5 and 6

In [14]:
class SentenceGenerator(object):
    """Markov chain creator for simulating bad English.

    Attributes:
        (what attributes do you need to keep track of?)

    Example:
        >>> yoda = SentenceGenerator("Yoda.txt")
        >>> print yoda.babble()
        The dark side of loss is a path as one with you.
    """

    def __init__(self, filename):
        """Read the specified file and build a transition matrix from its
        contents. You may assume that the file has one complete sentence
        written on each line.
        """
        #raise NotImplementedError("Problem 5 Incomplete")
        with open(filename, "r") as file:
            data = file.read()
            sentences = data.split("\n")
            unique_words = np.concatenate((["$tart"], np.unique(data.split()), ["$top"]))
        transmat = np.zeros((len(unique_words), len(unique_words)))
        vocab_ind = dict(zip(unique_words, np.arange(len(unique_words)))) #vocabulary of words

        for sent in sentences:
            words = sent.split()
            current = "$tart"
            for i in words:
                transmat[vocab_ind[i], vocab_ind[current],]+=1  
                current = i
            transmat[vocab_ind["$top"], vocab_ind[i]]+=1
            transmat[vocab_ind["$top"], vocab_ind["$top"]]+=1
        transmat /= np.sum(transmat, axis=0)
        self.vocab_ind = vocab_ind
        self.vocab_ind_rev = {self.vocab_ind[i]:i for i in self.vocab_ind}
        self.transmat = transmat
        
    def babble(self):
        """Begin at the start sate and use the strategy from
        four_state_forecast() to transition through the Markov chain.
        Keep track of the path through the chain and the corresponding words.
        When the stop state is reached, stop transitioning and terminate the
        sentence. Return the resulting sentence as a single string.
        """
        #raise NotImplementedError("Problem 6 Incomplete")
        x0 = 0
        string = []
        while x0!=self.vocab_ind["$top"]:
            x1 = np.argmax(np.random.multinomial(1, self.transmat[:,x0]))  
            string.append(self.vocab_ind_rev[x1])
            x0 = x1
        return " ".join(string[:-1])

In [15]:
sg = SentenceGenerator("tswift1989.txt")

In [16]:
for i in range(25):
    print(sg.babble())

heart-breakers gonna hate hate hate hate hate hate hate hate hate
i shake it off
the sunset babe red lip classic thing that i'll put it works
well and white but you're still don't fix bullet holes
pictures in your mind saying it's how it go out of you leave me out to my street the pain
i can't stop grooving
baby now
i wish
good
and they'll tell you took our bags on and we first dropped our broken hearts put them say you've done
and on my neck
this love it's always want back where i'm just gone was stay
she'll open up but you're the end as long list of style
it's been
your passport and that's how you have to do this love left a long list of kisses on apartment floors
i wish i shake it works
i wish i wish you know it could've been losing grip oh my brain
all the sky turned black like i wish i know you would wait forever but i shake shake it off he's so it's a tight little flame it off
i'm lightning on and guns
you had to my room when it's torture
people say you'll remember what you've do