# Jane Eyre - A Mathematical Theory of Communication

[A Mathematical Theory of Communication by Claude Shannon](https://people.math.harvard.edu/~ctm/home/text/others/shannon/entropy/entropy.pdf)

[The Mathematical Theory of Communication by Claude Shannon and Warren Weaver](https://pure.mpg.de/rest/items/item_2383164/component/file_2383163/content)

[Jane Eyre: An Autobiography by Charlotte Brontë](https://www.gutenberg.org/ebooks/1260)

## Imports

In [1]:
# The random module from the Python Standard Library.
import random

## Zero-order Approximation

In [2]:
# The allowed symbols
symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ "

In [3]:
# Randomly select k symbols from the string above.
L = random.choices(symbols, k=100)

# Show.
''.join(L)

'OLGCIIAH MMWNQZEQERMWLJSQYQFTDBCBQRZFJZIOQMPSILBAMLWGCJPHBRMLZQBRONOHPYUOPPNAPWCPXDJGMRFUWGEOVEZ GYJ'

## First-order Approximation

In [4]:
# Open the book.
with open('janeeyre.txt','r') as f:
    # Read the book into one long string.
    text = f.read().upper()

In [5]:
# Counts of the number of letters in the book.
counts = {s: text.count(s) for s in symbols}

In [6]:
# Show the counts.
counts

{'A': 62860,
 'B': 11161,
 'C': 18771,
 'D': 37462,
 'E': 100508,
 'F': 16889,
 'G': 15093,
 'H': 45904,
 'I': 56073,
 'J': 1223,
 'K': 6035,
 'L': 32495,
 'M': 22285,
 'N': 54288,
 'O': 60583,
 'P': 12143,
 'Q': 948,
 'R': 47417,
 'S': 50123,
 'T': 67127,
 'U': 23495,
 'V': 7629,
 'W': 18705,
 'X': 1277,
 'Y': 17307,
 'Z': 329,
 ' ': 168745}

In [7]:
# Sort the counts
sorted(counts.items(), key=lambda item: -item[1])

[(' ', 168745),
 ('E', 100508),
 ('T', 67127),
 ('A', 62860),
 ('O', 60583),
 ('I', 56073),
 ('N', 54288),
 ('S', 50123),
 ('R', 47417),
 ('H', 45904),
 ('D', 37462),
 ('L', 32495),
 ('U', 23495),
 ('M', 22285),
 ('C', 18771),
 ('W', 18705),
 ('Y', 17307),
 ('F', 16889),
 ('G', 15093),
 ('P', 12143),
 ('B', 11161),
 ('V', 7629),
 ('K', 6035),
 ('X', 1277),
 ('J', 1223),
 ('Q', 948),
 ('Z', 329)]

In [8]:
# Randomly select k symbols from the string above.
L = random.choices(list(counts.keys()), weights=list(counts.values()), k=100)

# Show.
''.join(L)

'R ISHEAF HNRENASLUNOONTCDNM HDILYRNNT DTHTO I NT  TN  OEOAMENITAROOPFSWET  EAANSPANIUF T U NDVSEMNRS'

## Second-order Approximation

In [9]:
# Pairs of symbols.
symsq = [a + b for a in symbols for b in symbols]

# Show.
symsq

['AA',
 'AB',
 'AC',
 'AD',
 'AE',
 'AF',
 'AG',
 'AH',
 'AI',
 'AJ',
 'AK',
 'AL',
 'AM',
 'AN',
 'AO',
 'AP',
 'AQ',
 'AR',
 'AS',
 'AT',
 'AU',
 'AV',
 'AW',
 'AX',
 'AY',
 'AZ',
 'A ',
 'BA',
 'BB',
 'BC',
 'BD',
 'BE',
 'BF',
 'BG',
 'BH',
 'BI',
 'BJ',
 'BK',
 'BL',
 'BM',
 'BN',
 'BO',
 'BP',
 'BQ',
 'BR',
 'BS',
 'BT',
 'BU',
 'BV',
 'BW',
 'BX',
 'BY',
 'BZ',
 'B ',
 'CA',
 'CB',
 'CC',
 'CD',
 'CE',
 'CF',
 'CG',
 'CH',
 'CI',
 'CJ',
 'CK',
 'CL',
 'CM',
 'CN',
 'CO',
 'CP',
 'CQ',
 'CR',
 'CS',
 'CT',
 'CU',
 'CV',
 'CW',
 'CX',
 'CY',
 'CZ',
 'C ',
 'DA',
 'DB',
 'DC',
 'DD',
 'DE',
 'DF',
 'DG',
 'DH',
 'DI',
 'DJ',
 'DK',
 'DL',
 'DM',
 'DN',
 'DO',
 'DP',
 'DQ',
 'DR',
 'DS',
 'DT',
 'DU',
 'DV',
 'DW',
 'DX',
 'DY',
 'DZ',
 'D ',
 'EA',
 'EB',
 'EC',
 'ED',
 'EE',
 'EF',
 'EG',
 'EH',
 'EI',
 'EJ',
 'EK',
 'EL',
 'EM',
 'EN',
 'EO',
 'EP',
 'EQ',
 'ER',
 'ES',
 'ET',
 'EU',
 'EV',
 'EW',
 'EX',
 'EY',
 'EZ',
 'E ',
 'FA',
 'FB',
 'FC',
 'FD',
 'FE',
 'FF',
 'FG',
 'FH',

In [12]:
# Counts of the number of double letters in the book.
countsq = {s: text.count(s) for s in symsq}

In [13]:
# Example
countsq['EA']

5239

In [15]:
# Our eventual output.
output = 'T'

In [17]:
# The counts for keys beginning with the last letter.
condcounts = {s: countsq[output[-1] + s] for s in symbols}

# Show
condcounts

{'A': 2263,
 'B': 2,
 'C': 310,
 'D': 3,
 'E': 6382,
 'F': 52,
 'G': 1,
 'H': 18811,
 'I': 4190,
 'J': 0,
 'K': 0,
 'L': 1079,
 'M': 92,
 'N': 83,
 'O': 7190,
 'P': 0,
 'Q': 0,
 'R': 1926,
 'S': 1100,
 'T': 1368,
 'U': 1258,
 'V': 0,
 'W': 394,
 'X': 0,
 'Y': 836,
 'Z': 4,
 ' ': 15030}

In [18]:
# Randomly select the next symbol using the conditional weights.
random.choices(list(condcounts.keys()), weights=list(condcounts.values()), k=1)

['H']

In [28]:
for i in range(1,100):
    condcounts = {s: countsq[output[-1] + s] for s in symbols}
    next = random.choices(list(condcounts.keys()), weights=list(condcounts.values()), k=1)[0]
    output += next

# Show
output

'TI BE ILYOUL DE CLAT FEDELVEESTHIN SSOUN LITHE ABR THERISOOWNNCARAN THES MANG PLELERT COF THANSHAPERCCIDETRARSAXCOULLWONAMPPATOLKEIS F RESHESESOG R TEINGE TEN MEND STENGRELIASHERERTHIN S WHTLDINORYOCE RMED HIT DIN ANFE ATY SIKE WERED A F ISOTHILED T THERITHIRR OUSSELD D ANG ST MAKEPT THE HERD EN MPREALDE ISTHINHASUIT FLOMY TO R BT MYOMY LY TEROUS NI AN HEE ST PALOUBEST THER AN CE COGRANE BOF HE OCE A L HATTOWHEND EWN ACOLAWIRE M AFIRE SH CATTITTH FENOMYOLENTOSEEANTHEVE TSOR TINGHE WH WOUPE INSNGE S TADE ID ICHALAMY TI ARRY PRURECAI HA E OFRAVEAR ALLENTHO AGHAIDG TITIGLLOAS OOWANICTINTHAISEREROULLOBED BER ALYALASATHINGOUSH ISHEST E A ELY I IVI OUTHITHALEIASHENTROHINERANDE LET LTHEAMISESSTIS AND TOFECLINDELY AND REA AXESOF SULLECOR HENDOK THING COATAD INTHANE BETHEAJAND D WE RT ANOPL AN AWHEW QUTHE NNTMANFINSTU L HIAWATANASEXAUSUD MOWI ISHMPE THEART BULERINMBED WAYOFAPURSUTHISU ASO'