# Markov Chain

In [1]:
import pandas as pd
import numpy as np

![](weather.png)

In [2]:
data = ['cold', 'cold', 'hot', 'cold', 'cold', 'hot', 'cold', 'hot', 'hot']

### 1. Convert the data to a DataFrame
with a single column `weather`

In [3]:
df = pd.DataFrame(data, columns=['weather'])
df

Unnamed: 0,weather
0,cold
1,cold
2,hot
3,cold
4,cold
5,hot
6,cold
7,hot
8,hot


### 2. Identify transitions
create another column so that we have the columns:

* $Y_t$ – the current state
* $Y_{t-1}$ –  the state before

In [4]:
df['before'] = df['weather'].shift(1)
df.head()

Unnamed: 0,weather,before
0,cold,
1,cold,cold
2,hot,cold
3,cold,hot
4,cold,cold


### 3. Count transitions
Count the absolute number of each possible transition

In [5]:
(df['weather'] + '->' + df['before']).value_counts()

hot->cold     3
cold->hot     2
cold->cold    2
hot->hot      1
dtype: int64

In [6]:
d = {('cold', 'hot'):3, ('cold', 'cold'):2}

In [7]:
df['count'] = 1

In [8]:
ct = df.groupby(['before', 'weather'])['count'].count().unstack()
ct

weather,cold,hot
before,Unnamed: 1_level_1,Unnamed: 2_level_1
cold,2,3
hot,2,1


### 4. Calulate a transition matrix
The transition matrix $P$ has the element $p_{ij}$, with rows $i$ and columns $j$, such that:

$$
p_{ij} = P(Y_t = y_j | Y_{t-1} = y_i)
$$

For example $p_{0,1} = p_{cold, hot}$ is the probability of a hot day when it was cold the day before. 

In [9]:
P = (ct.T / ct.sum(axis=1)).T
P

weather,cold,hot
before,Unnamed: 1_level_1,Unnamed: 2_level_1
cold,0.4,0.6
hot,0.666667,0.333333


In [10]:
# there exists a neat one-line in pandas
df.groupby('before')['weather'].value_counts(normalize=True).unstack()

weather,cold,hot
before,Unnamed: 1_level_1,Unnamed: 2_level_1
cold,0.4,0.6
hot,0.666667,0.333333


### 5. Calulate probabilities for the next day

In [11]:
initial_state = np.array([0, 1])   #population: 0 items in the cold state, 1 in hot state
day_plus_one = np.dot(initial_state, P)
day_plus_one

array([0.66666667, 0.33333333])

In [12]:
# what is a dot product?

# apples, milk, chili
a = [3, 2, 1]  # amounts
b = [0.5, 1.0, 2.0]  #prices
sum([j*i for j, i in zip(a, b)])

5.5

In [13]:
a, b = np.array(a), np.array(b)
np.dot(a, b)

5.5

### 6. Calulate probabilities two days ahead

In [14]:
day_plus_two = np.dot(day_plus_one, P)
day_plus_two

array([0.48888889, 0.51111111])

### 7. Calculate the probabilities many days ahead

In [15]:
day_plus_one.dot(P).dot(P).dot(P).dot(P).dot(P).dot(P).dot(P).dot(P).dot(P).dot(P).dot(P).dot(P)

array([0.52631581, 0.47368419])