### Benchmarks:

In [1]:
from chainopy import MarkovChain
import pydtmc
import numpy as np
from timeit import timeit

#### 1. The fit functions

In [13]:
init_tpm = np.ones((100, 100))/100
chainopy_mc = MarkovChain()
pydtmc_mc = pydtmc.MarkovChain(init_tpm)

In [11]:
# Write function to generate unique words as per size
data_sizes_fit = [10, 50, 100, 500, 1000, 5000, 10000]

data_dict = {}

for size in data_sizes_fit:
    unique_words = [f"word{i}" for i in range(size)]
    data_dict[str(size)] = " ".join(unique_words)

In [27]:
words = data_dict["10"]
possible_states = list(set(words.split(" ")))

### Length = 10

In [17]:
%%timeit

chainopy_mc.fit(words)

116 µs ± 5.28 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [33]:
%%timeit

pydtmc_mc.fit_sequence(sequence=words.split(" "), possible_states=possible_states, fitting_type="mle")

14 ms ± 1.74 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Length = 50

In [39]:
words = data_dict["50"]
possible_states = list(set(words.split(" ")))

In [40]:
%%timeit

chainopy_mc.fit(words)

266 µs ± 15 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [41]:
%%timeit

pydtmc_mc.fit_sequence(sequence=words.split(" "), possible_states=possible_states, fitting_type="mle")

14.4 ms ± 1.17 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Length = 100

In [42]:
words = data_dict["100"]
possible_states = list(set(words.split(" ")))

In [43]:
%%timeit

chainopy_mc.fit(words)

496 µs ± 47.3 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [44]:
%%timeit

pydtmc_mc.fit_sequence(sequence=words.split(" "), possible_states=possible_states, fitting_type="mle")

17.3 ms ± 2.18 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Length = 500

In [45]:
words = data_dict["500"]
possible_states = list(set(words.split(" ")))

In [46]:
%%timeit

chainopy_mc.fit(words)

6.58 ms ± 403 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [47]:
%%timeit

pydtmc_mc.fit_sequence(sequence=words.split(" "), possible_states=possible_states, fitting_type="mle")

63.6 ms ± 6.63 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Length = 1000

In [48]:
words = data_dict["1000"]
possible_states = list(set(words.split(" ")))

In [49]:
%%timeit

chainopy_mc.fit(words)

23.6 ms ± 1.75 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [50]:
%%timeit

pydtmc_mc.fit_sequence(sequence=words.split(" "), possible_states=possible_states, fitting_type="mle")

224 ms ± 5.84 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Length = 5000

In [51]:
words = data_dict["5000"]
possible_states = list(set(words.split(" ")))

In [52]:
%%timeit

chainopy_mc.fit(words)

587 ms ± 30.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [53]:
%%timeit

pydtmc_mc.fit_sequence(sequence=words.split(" "), possible_states=possible_states, fitting_type="mle")

5.3 s ± 212 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Length = 10000

In [57]:
words = data_dict["10000"]
possible_states = list(set(words.split(" ")))

In [58]:
%%timeit

chainopy_mc.fit(words)

2.88 s ± 533 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [59]:
%%timeit

pydtmc_mc.fit_sequence(sequence=words.split(" "), possible_states=possible_states, fitting_type="mle")

26.1 s ± 1.23 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
