In [1]:
import sympy as sp
import numpy as np
import galois as gf
import pandoc
from math import *

np.set_printoptions(legacy='1.25')

# Homework 1

## Problem 1

In a finite field GF(p), we can divide a polynomial $a(x)$ by another polynomial $b(x)$ to obtain a quotient $q(x)$ and
a reminder $r(x)$ that satisfy:

(i) $a(x) \stackrel{p}{≡} b(x).q(x) + r(x),$

(ii) $deg(r(x)) < deg(b(x)).$

Here is an example of how it works. Let $a(x) = x^{3} + x + 2,$ $b(x) = 2x + 1$ and $p = 3.$ Note that for a polynomial $m(x),$ we denote the coefficient of $x^{k}$ by $m_{k}.$ For instance, we have ($b_{1}, b_{0}$) = (2, 1):
- Set $q(x) = 0;$
- Find the multiplicative inverse of $b_{1}$ in GF($3$), i.e., $2^{−1}$ $\stackrel{3}{≡} 2$.
- Find the difference between the degree of $a(x)$ and $b(x),$ which is $d = 3 − 1 = 2.$
- Set s(x) = $b^{−1}_{1} ∗ a_{3} ∗ x^{d} = 2x^{2}$ and update $q(x) ← q(x) + s(x) = 2x^{2}$.
- Update $a(x) ← a(x) − b(x) · s(x) = x^{3} + x + 2 − 4x^{3} − 2x^{2} \stackrel{3}{≡} x^{2} + x + 2.$
- Find the difference between the degree of $a(x)$ and $b(x)$, which is $d = 2 − 1 = 1.$
- Set $s(x) = b^{−1}_{1} ∗ a_{2} ∗ x^{d} = 2x$ and update $q(x) ← q(x) + s(x) = 2x^{2} + 2x.$
- Update $a(x) ← a(x) − b(x) · s(x) = (x^{2} + x + 2) − 4x^{2} − 2x \stackrel{3}{≡} 2x + 2.$
- Find the difference between the degree of $a(x)$ and $b(x),$ which is $d = 1 − 1 = 0.$
- Set $s(x) = b^{−1}_{1} ∗ a_{1} ∗ x^{d} = 1$ and update $q(x) ← q(x) + s(x) = 2x^{2} + 2x + 1.$
- Update $a(x) ← a(x) − b(x) · s(x) = (2x + 2) − 2x − 1 \stackrel{3}{≡} 1.$
- Set $r(x) = a(x) = 1,$ since $deg(a(x)) < deg(b(x)).$

Therefore, at the end, we get $q(x) = 2x^{2} + 2x + 1$ and $r(x) = 1.$

(a) Write a code that takes two polynomials $a(x)$ and $b(x),$ together with a prime number $p$ and returns $q(x)$ and $r(x).$ Note that polynomial $m(x) = m_{k}x^{k} + m_{k−1}x^{k−1} + · · · + m_{1}x + m_{0}$ can be represented in the form of the vector of coefficients, i.e., $[m_{k}, m_{k−1}, . . . , m_{2}, m_{1}, m_{0}].$



In [2]:

# Calculate a polynomial
def calc_polynomial(poly_a, input) :
    degree_a = len(poly_a)-1
    i=degree_a
    output = 0
    while i>=0 :
        output = poly_a[degree_a-i]*input**i + output
        i-=1
    return output

#Finds inverse in modulo. e.g. 2 inverse in mod 3 is 2 b/c 2*2 = 4(mod 3) = 1
#==Inputs== 
#input: value to find the inverse of
#modulo: value of the modulus
def find_modulo_inverse(input, modulo) :
    i=modulo
    while i>0 :
        if np.mod(input*i,modulo) == 1 :
            return i
        i-=1
    return 0

#Finds the degree of a polynomial vector by ignoring finding the first non-zero value.
def find_degree(polynomial) :
    shift = 0
    for i in polynomial :
        if i!=0 :
            return len(polynomial)-shift-1
        else : 
            shift +=1
    return 0

#Removes all zeros in a polynomial vector preceeding the first non-zero element.
#e.g. [0, 0, 0, 2, 0, 3, 0] --> [2, 0, 3, 0]
def remove_zeros(polynomial) :
    shift = 0
    for i in polynomial :
        if i!=0 :
            while shift>0 :
                polynomial.pop(0)
                shift-=1
        else : 
            shift +=1
    return polynomial


#Description:   follows the steps outlined in Problem 1 to divide a polynomial a(x) by another polynomial, b(x)
#               to get the quotient q(x) and the remainder r(x)
#==Inputs==
#a: Vector of polynomial coefficients starting at the highest degree. e.g. a(x) 3x^2 + 1 --> poly_a=[3 0 1]
#b: Vector of polynomial coefficients to divide into a(x), same format as a. Note: len(b)<len(a) must be true!
#GF: Galois Field order, e.g. GF(p) --> GF=p, where p must be prime.
#==Outputs
#divide_in_GF[0] = q(x)
#divide_in_GF[1] = r(x)
def divide_in_GF(poly_a, poly_b, GF):
    degree_a = find_degree(poly_a) # Get degree of each polynomial
    print(f"degree a(x): {degree_a}")
    degree_b = find_degree(poly_b)
    print(f"degree b(x): {degree_b}")
    poly_q = 0
    
    #Calculate degree difference between a(x) and b(x)
    if degree_a > degree_b :
        degree_diff = degree_a - degree_b
        print(f"degree difference: {degree_a} - {degree_b} = {degree_diff}")
    else :
        print("ERROR: degree of a < degree of b !")
        return 0

    bk_inverse = find_modulo_inverse(poly_b[0],GF) # find inverse of highest power of b
    print(f"b{degree_b} = {bk_inverse}")

    a_index = 0 #index of poly_a, to be looped over
    poly_s = [0] * (degree_a) #Create an empty matrix of n=degree_diff elements 
    poly_q = [0] * (degree_a)
    while a_index<degree_a :
        print('')
        print(f'STARTLOOP a_index={a_index}')
        print(f'degree_diff = {degree_diff}')


        sq_index = len(poly_s)-1-degree_diff #starts at the first non-zero element and decrements with degree_diff
        print(f'sq_index={sq_index}')
        print(f'a_index={a_index}')

        poly_s[sq_index] = np.mod((bk_inverse*poly_a[a_index]),GF)
        print(f's(x) = {poly_s}')

        poly_q =  np.mod(np.polyadd(poly_s,poly_q),GF)
        print(f'q(x) = {poly_q}')
        print(f's(x)*b(x)={np.mod(np.polymul(poly_s,poly_b),GF)}')

        poly_a = np.mod(np.polysub(poly_a,np.mod(np.polymul(poly_s,poly_b),GF)),GF)
        print(f'a(x) = {poly_a}')
 
        poly_s[sq_index]=0 #reset s(x) = 0 for next loop

        print('ENDLOOP')
        if degree_diff == 0 : #end loop condition.
            break

        #incrementing/decrementing values
        a_index+=1
        sq_index+=1
        degree_diff = find_degree(poly_a) - find_degree(poly_b)
        

    #remove all the preceeding zeros and output answers. 
    remainder = remove_zeros(poly_a.tolist())
    poly_q = remove_zeros(poly_q.tolist())

    return (poly_q,remainder)

#pretty simple function to print polynomial coefficient vectors readably
def print_polynomial(polynomial) :
    degree = len(polynomial)-1
    for i in polynomial :
        print(f'{i}x^{degree} +')
        degree-=1


In [3]:
#The Problem 1 Example
a = [1, 0, 1, 2]
b = [2, 1]
GF = 3

example1 = divide_in_GF(a,b,GF)

degree a(x): 3
degree b(x): 1
degree difference: 3 - 1 = 2
b1 = 2

STARTLOOP a_index=0
degree_diff = 2
sq_index=0
a_index=0
s(x) = [2, 0, 0]
q(x) = [2 0 0]
s(x)*b(x)=[1 2 0 0]
a(x) = [0 1 1 2]
ENDLOOP

STARTLOOP a_index=1
degree_diff = 1
sq_index=1
a_index=1
s(x) = [0, 2, 0]
q(x) = [2 2 0]
s(x)*b(x)=[1 2 0]
a(x) = [0 0 2 2]
ENDLOOP

STARTLOOP a_index=2
degree_diff = 0
sq_index=2
a_index=2
s(x) = [0, 0, 1]
q(x) = [2 2 1]
s(x)*b(x)=[2 1]
a(x) = [0 0 0 1]
ENDLOOP


Let's print out the answer to see if it's correct.

In [4]:
print('q(x) =')
print_polynomial(example1[0])
print('')
print('r(x) =')
print_polynomial(example1[1])

q(x) =
2x^2 +
2x^1 +
1x^0 +

r(x) =
1x^0 +


Yep! That looks like $q(x) = 2x^{2} + 2x + 1$ and $r(x) = 1.$ to me!

(b) Let $a(x) = 5x^{8} + 3x^{3} + 2x^{2} + 4$ and $b(x) = 4x^{3} + x^{2} + 6.$ Compute $q(x)$ and $r(x)$ in GF($7$) using your code.

In [5]:

a = [5, 0, 0, 0, 0, 3, 2, 0, 4]
b = [4, 1, 0, 6]
GF = 7

answer = divide_in_GF(a,b,GF)

degree a(x): 8
degree b(x): 3
degree difference: 8 - 3 = 5
b3 = 2

STARTLOOP a_index=0
degree_diff = 5
sq_index=2
a_index=0
s(x) = [0, 0, 3, 0, 0, 0, 0, 0]
q(x) = [0 0 3 0 0 0 0 0]
s(x)*b(x)=[5 3 0 4 0 0 0 0 0]
a(x) = [0 4 0 3 0 3 2 0 4]
ENDLOOP

STARTLOOP a_index=1
degree_diff = 4
sq_index=3
a_index=1
s(x) = [0, 0, 0, 1, 0, 0, 0, 0]
q(x) = [0 0 3 1 0 0 0 0]
s(x)*b(x)=[4 1 0 6 0 0 0 0]
a(x) = [0 0 6 3 1 3 2 0 4]
ENDLOOP

STARTLOOP a_index=2
degree_diff = 3
sq_index=4
a_index=2
s(x) = [0, 0, 0, 0, 5, 0, 0, 0]
q(x) = [0 0 3 1 5 0 0 0]
s(x)*b(x)=[6 5 0 2 0 0 0]
a(x) = [0 0 0 5 1 1 2 0 4]
ENDLOOP

STARTLOOP a_index=3
degree_diff = 2
sq_index=5
a_index=3
s(x) = [0, 0, 0, 0, 0, 3, 0, 0]
q(x) = [0 0 3 1 5 3 0 0]
s(x)*b(x)=[5 3 0 4 0 0]
a(x) = [0 0 0 0 5 1 5 0 4]
ENDLOOP

STARTLOOP a_index=4
degree_diff = 1
sq_index=6
a_index=4
s(x) = [0, 0, 0, 0, 0, 0, 3, 0]
q(x) = [0 0 3 1 5 3 3 0]
s(x)*b(x)=[5 3 0 4 0]
a(x) = [0 0 0 0 0 5 5 3 4]
ENDLOOP

STARTLOOP a_index=5
degree_diff = 0
sq_index=7
a_inde

In [6]:
print('q(x) =')
print_polynomial(answer[0])
print('')
print('r(x) =')
print_polynomial(answer[1])

q(x) =
3x^5 +
1x^4 +
5x^3 +
3x^2 +
3x^1 +
3x^0 +

r(x) =
2x^2 +
3x^1 +
0x^0 +


## Problem 2

A square matrix is invertible (or full-rank) if and only if its determinant is non-zero or, equivalently, its rows are
linearly independent. Let

 $ \begin{align} A = \begin{bmatrix} - & a_1 & - \\ - & a_2 & - \\ - & a_3 & - \\  & \vdots &  \\ - & a_n & - \end{bmatrix} , \end{align} $

be a **full-rank matrix** with its elements from GF($q$), where $a_{i}$ is the *i*th row of matrix A.

(a) How many choices for $a_{1}$ exists?

(b) Once you fix $a_{1}$, how many choices for $a_{2}$ we have, such that $a_{1}$ and $a_{2}$ are linearly independent?

(c) If $a_{1}$ and $a_{2}$ are linearly independent, how linear combination of $a_{1}$ and $a_{2}$ exixts?

(d) Once you fix $a_{1}, a_{2},$ how many choices for $a_{3}$ do we have, such that $a_{3}$ is linearly independent from $(a_{1}, a_{2})?$

(e) Given rows $a_{1}, . . . , a_{m−1},$ how many valid choices exist for a general row $a_{m}$ to have a full-rank matrix?

(f) In general, how many full-rank matrices of size $n$ in GF(q) exists?

(g) How many matrices (regardless of their rank) are in GF(q)?

(h) If you generate an $n × n$ matrix uniformly at random, what is the probability that it is full-rank?


## Problem 3

Write a computer program that takes a prime number $p$ and an irreducible polynomial $m(x)$ of degree $k$ in GF($p$) to generate GF($p^{k}$) and its operations. Let $α$ be a root of $m(x).$ Note that each element of GF($p^{k}$) is of the form of $b = b_{k−1}α^{k−1} + b_{k−2}α^{k−2} + · · · + b_{1α} + b_{0},$ which you can represent by $B = [b_{k−1}, b_{k−2}, . . . , b_{1}, b_{0}].$ Then, you need to implement two commands

• MyGFAdd($A,B,M,p$) that returns the summation of $a$ and $b$ in GF($p^{k}$);

• MyGFMult($A,B,M,p$) that returns the multiplication of $a$ and $b$ in GF($p^{k}$).

## Problem 4

In this problem, you are supposed to decode a ciphertext that is encrypted using the Vig\'enere cipher. You should return the key, the plaintext, and your codes.

You may write all the codes from scratch or use the provided MATLAB codes to simplify your task. The provided files are.

• ciphertext.txt in which the ciphertext is provided.

• EngAlphabetFrequency.mat in which the empirical frequency of the English alphabet is provided. Note that the first character is space, which is followed by 26 letters.

• find rep.m that finds all repeats of a given length in a given text. For instance:
> find rep(’this is the best and simplest scheme.’,4)

> {[14 27]}

as ’est ’ appears in positions 14 and 27 of the sentence.
> find rep(’this is the best and simplest scheme.’,5)

> 0×0 empty cell array

as there is no repeat of length 5 in the sentence.

• EmpiricalFrequency.m that returns the empirical frequency of letters in a text.
Also, plot the histograms of the ciphertext and the plaintext.

### EmpiricalFrequency.m

In [7]:
import csv

In [8]:
def ProcessFile(filename,type) :
    if type == 'file' :
        file = open(filename)
        readfile = file.read()
        input = readfile
    elif type == 'char' :
        input = filename
    else :
        return -1

    alphabet = ' abcdefghijklmnopqrstuvwxyz'
    input = input.lower()

    #Create Known 
    known = [False] * len(input)

    input = list(input)
    #print(input)

    #Replace all unknown characters with spaces
    known = [char if char in alphabet else ' ' for char in input]
    #Replace all double spaces with single spaces
    known_str = ''.join(known)
    known_str_single_space = ' '.join(known_str.split())
    output = list(known_str_single_space)
    return output


def EmpiricalFrequency(filename,type) :
    if type == 'file' :
        file = open(filename)
        readfile = file.read()
        input = readfile
    elif type == 'char' :
        input = filename
    else :
        return -1

    alphabet = ' abcdefghijklmnopqrstuvwxyz'
    input = input.lower()

    #Create Known 
    known = [False] * len(input)

    input = list(input)
    #print(input)

    #Replace all unknown characters with spaces
    known = [char if char in alphabet else ' ' for char in input]
    #Replace all double spaces with single spaces
    known_str = ''.join(known)
    known_str_single_space = ' '.join(known_str.split())

    #Initialize Frequency list
    na=len(alphabet)
    F = np.zeros(na, dtype=int)

    for char in known_str_single_space:
        if char in alphabet:
            ps = alphabet.index(char)  # Get the position of the character in the alphabet
            F[ps] += 1  # Increment the frequency count for this character

    # Normalize the frequency
    F_normalized = F / np.sum(F) if np.sum(F) > 0 else F  # Avoid division by zero
    return F_normalized


In [9]:
EmpiricalFrequency('abcdef','char')

array([0.        , 0.16666667, 0.16666667, 0.16666667, 0.16666667,
       0.16666667, 0.16666667, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        ])

In [10]:
print(EmpiricalFrequency('ciphertext.txt', 'file'))

[0.02669762 0.05252467 0.02814858 0.03250145 0.02698781 0.0548462
 0.04962275 0.03627394 0.02669762 0.04149739 0.04526988 0.013639
 0.03685432 0.04178758 0.04962275 0.03366222 0.04091701 0.04062681
 0.0377249  0.05165409 0.0446895  0.04352873 0.02466628 0.03337203
 0.04265815 0.02553685 0.01799187]


### rep.m

In [11]:
def find_rep(A, k):
    n = len(A)
    #print(n)
    pos = []
    all_pos = set()  # Use a set to track all positions
    
    for i in range(0,n - k + 1):
        if i not in all_pos:  # Avoid double reporting
            pattern = A[i:i+k]  # Extract the substring
            occurrences = [j for j in range(n - k + 1) if A[j:j + k] == pattern]
            if len(occurrences) > 1:
                pos.append(occurrences)  # Store the positions
                all_pos.update(occurrences)  # Update the set with new positions
                
    return pos

# Example usage
result = find_rep('this is the best and simplest scheme.', 4)
print(result)  # Output: [[13, 26]] (Python indexes at 0, MATLAB indexes at 1)

[[13, 26]]


In [159]:
def ProcessFile(filename,type) :
    if type == 'file' :
        file = open(filename)
        readfile = file.read()
        input = readfile
    elif type == 'char' :
        input = filename
    else :
        return -1

    alphabet = ' abcdefghijklmnopqrstuvwxyz'
    input = input.lower()

    #Create Known 
    known = [False] * len(input)

    input = list(input)
    #print(input)

    #Replace all unknown characters with spaces
    known = [char if char in alphabet else ' ' for char in input]
    #Replace all double spaces with single spaces
    known_str = ''.join(known)
    known_str_single_space = ' '.join(known_str.split())
    output = list(known_str_single_space)
    return output


In [13]:
#test Case
print(ProcessFile('testcipher.txt','file'))
#input:     abc  ef,  g01!dab
# #1        abc  ef   g   dab
# #2        abc ef g dab

['a', 'b', 'c', ' ', 'e', 'f', ' ', 'g', ' ', 'd', 'a', 'b']


In [160]:
cipher_processed = ProcessFile('ciphertext.txt','file')

#Finds the length of a Vignere cipher key.
def find_key_length(input) :
    repeat_max = [0, []] #format of repeat_max = [<highest number of repeats>, <list of said repeats>]

    for i in find_rep(input,5) :
        if len(i)>repeat_max[0] :
            repeat_max[0] = len(i)
            repeat_max[1] = i

    #print(f'max={repeat_max}')

    #TODO: Find the difference between the last number, and the previous

    differences = [-1] * (len(repeat_max[1])-1)
    i=0
    while i<len(differences) :
        differences[i] = repeat_max[1][i+1]-repeat_max[1][i]
        i+=1

    #print(f'post_diff={differences}')
    key_length = gcd(*differences)
    print(f'Key Length = {key_length}')

    return key_length

find_key_length(cipher_processed)

#selected = [806, 1067, 1742, 2111, 2309, 2453, 2579, 3038, 3110]
#              261,  675,  369,  198,  144,  126,  459,  72,

#gcd(*[261, 675, 369, 198, 144, 126, 459, 72])

Key Length = 9


9

In [49]:
#Gather plaintext letters that are "added (mod 7)" by the same key letter.
#e.g. create: [ 
#               [1*key_length+0,            2*key_length+0,             ..., num_keys*key_length+0], 
#               [1*key_length+1,            2*key_length+1,             ..., num_keys*key_length+1],
#               [1*key_length+(key_length), 2*key_length+(key_length),  ..., num_keys*key_length+(key_length)] 
#             ]
#this would look like: [[1, 10, 19, 28...], [2, 11, 20, 29, ...]]

key_length = find_key_length(cipher_processed)
num_keys = (len(cipher_processed)//key_length)+1
sorted_numbers = [[] * num_keys] * (key_length) # The array that will house the "sets" of letters as shown above.
#print(f'sorted_numbers={sorted_numbers}')

for n in range(0, key_length) :
    temp = [-1] * num_keys
    for key in range(0, num_keys) :
        #print(f'key={key}')
        #print(f'n={n}')
        value = ((key*key_length)+n)
        if value < len(cipher_processed) :
            temp[key] = value
        else :
            break
        #print(f'temp={temp}')
        sorted_numbers[n] = temp
    #print(sorted_numbers)
    #print(f'n={n}')
    #print(f'sorted_numbers[{n}]={sorted_numbers[n]}')

cipher_processed_str = ''.join(cipher_processed)

cipher_processed[3445]

i=0
sorted_letters = [[] * num_keys] * (key_length)
for b in sorted_numbers :
    sorted_letters
    j=0
    temp = []
    for letter in b :
        temp.append(cipher_processed[letter])
    sorted_letters[i] = temp
    i+=1

print(sorted_letters)

# print(len(cipher_processed))

#TODO:  (1) Make that previous section have better named variables. DONE
#       (2) Use the values in each section to grab the equivalent letter from cipher_processed. DONE
#       (3) Do frequency analysis on just those letters
#       (4) Compare that to the frequency of the english language
#       (5) Decode each encoded letter
#       (6) Recombine the decoded letters into the original text.

Key Length = 9
[['f', 's', 'y', 'f', 'v', 'm', 'b', 'p', 'e', 'm', 'm', 'n', 'n', 'r', 'e', 'm', 'r', 'r', 'a', 'o', 'v', 'u', 'o', 'k', 'k', 'q', 'f', 'e', 'a', 'e', 'i', 'p', 'i', 'u', 'm', 'a', 'v', 'o', 'r', 'r', 'm', 'm', 'g', 'm', 'k', 'm', 'm', 'y', 'n', 'y', ' ', 'm', 'm', 'u', 'z', 'r', 'd', 'q', 'r', 'f', 'q', 'q', 'e', 'd', ' ', ' ', 'n', 'm', 'r', 'm', 'e', 'r', 'y', 'f', 'a', 'r', 'r', 'm', 'r', 'm', 'v', 'e', ' ', 't', 'v', 's', 'p', 'u', 'm', 'r', 'u', ' ', 'u', 'd', 'm', 'v', 'a', 'd', 'm', 'q', 'n', 'f', 'a', 'o', 'v', 'e', ' ', 'n', 'm', 'm', 'm', 'r', 'd', 'm', 'q', 'f', 'f', 'm', 'i', 'u', 'o', 'k', 'i', 'y', 'q', 'v', 'n', 'n', 'm', 'f', 'y', 'u', 'f', 'f', 'g', 'a', 'm', 'k', 'm', 'k', ' ', 'x', 'i', 'p', 't', 'm', 'm', 'x', 'o', 'v', 'h', 'f', 'r', 'm', ' ', 'u', 'r', 'm', 'm', 'g', 'a', 'v', ' ', 'r', 'v', 'h', 'm', 'r', 'm', 'e', 'e', 'r', 'v', 'h', 'm', 'e', 'i', 'q', 'a', 'r', 'i', 'a', 'a', 'r', 'r', 'd', 'm', 'm', 'n', 'k', 's', 'v', 'r', 'e', 'u', 'r', 'f'

In [50]:
#Check to see if this works as intended
for n in range(0,len(sorted_letters)) :
    print(f'sorted_letters[{n}]={sorted_letters[n]}')

# expected output:
# f s y ...
# q r i ...
# s e r ...
# e x t ...
# w u d ...
# u p j ...
# f k a ...
# j q q ...
# s d f ...
#CORRECT!

sorted_letters[0]=['f', 's', 'y', 'f', 'v', 'm', 'b', 'p', 'e', 'm', 'm', 'n', 'n', 'r', 'e', 'm', 'r', 'r', 'a', 'o', 'v', 'u', 'o', 'k', 'k', 'q', 'f', 'e', 'a', 'e', 'i', 'p', 'i', 'u', 'm', 'a', 'v', 'o', 'r', 'r', 'm', 'm', 'g', 'm', 'k', 'm', 'm', 'y', 'n', 'y', ' ', 'm', 'm', 'u', 'z', 'r', 'd', 'q', 'r', 'f', 'q', 'q', 'e', 'd', ' ', ' ', 'n', 'm', 'r', 'm', 'e', 'r', 'y', 'f', 'a', 'r', 'r', 'm', 'r', 'm', 'v', 'e', ' ', 't', 'v', 's', 'p', 'u', 'm', 'r', 'u', ' ', 'u', 'd', 'm', 'v', 'a', 'd', 'm', 'q', 'n', 'f', 'a', 'o', 'v', 'e', ' ', 'n', 'm', 'm', 'm', 'r', 'd', 'm', 'q', 'f', 'f', 'm', 'i', 'u', 'o', 'k', 'i', 'y', 'q', 'v', 'n', 'n', 'm', 'f', 'y', 'u', 'f', 'f', 'g', 'a', 'm', 'k', 'm', 'k', ' ', 'x', 'i', 'p', 't', 'm', 'm', 'x', 'o', 'v', 'h', 'f', 'r', 'm', ' ', 'u', 'r', 'm', 'm', 'g', 'a', 'v', ' ', 'r', 'v', 'h', 'm', 'r', 'm', 'e', 'e', 'r', 'v', 'h', 'm', 'e', 'i', 'q', 'a', 'r', 'i', 'a', 'a', 'r', 'r', 'd', 'm', 'm', 'n', 'k', 's', 'v', 'r', 'e', 'u', 'r', '

In [215]:
#combine all the letters in each pair into one continuous string
unseparated_sorted_letters = [[] * num_keys] * (key_length)
for groups in range(0,len(sorted_letters)):
    unseparated_sorted_letters[groups] = ''.join(sorted_letters[groups])


freq_encoded = [-1] * len(unseparated_sorted_letters)
# Frequency Analysis on Variables
index = 0
for groups in unseparated_sorted_letters :
    freq_encoded[index] = EmpiricalFrequency(groups, 'char')
    # print(EmpiricalFrequency(groups, 'char'))
    index+=1

alphabet = ' abcdefghijklmnopqrstuvwxyz'

paired_freq_encoded = [[[-1, '?'] for _ in range(len(alphabet))] for _ in range(key_length)]
#print(paired_freq_encoded)
i=0
for group in paired_freq_encoded :
    j=0
    #print(group)
    for letter_pair in group :
        paired_freq_encoded[i][j][0] = freq_encoded[i][j]  # Directly modify the original list
        paired_freq_encoded[i][j][1] = alphabet[j]         # Directly modify the origina
        j+=1
    i+=1

paired_alphabet_frequency = [[-1, '?'] for _ in range(len(alphabet))]

alphabet_frequency = [0.185057120049760, 0.0640553023616335, 0.0145802950629794, 0.0207519961563824, 0.0337670786805122, 0.0975032137896586, 0.0183885252227195, 0.0191935888894409, 0.0497240802504884, 0.0561516595676339, 0.00164143158421966, 0.00830311225500171, 0.0377937580685005, 0.0217067758050807, 0.0552268231153310, 0.0631141332567514, 0.0155745452887443, 0.000913948017250003, 0.0483235552918135, 0.0528674353448129, 0.0692048516144487, 0.0229922916685098, 0.00671544231885557, 0.0179876947073359, 0.000996291807337308, 0.016730760985920, 0.000734288838877702]

i=0
for group in paired_alphabet_frequency :
    group[0] = alphabet_frequency[i]
    group[1] = alphabet[i]
    i+=1

#sort based on frequency
paired_alphabet_frequency.sort(reverse=True)
for pairs in paired_freq_encoded :
    pairs.sort(reverse=True)




print(f'alphabet frequency = {paired_alphabet_frequency}')

key = [] * key_length

for x in paired_freq_encoded :
    key.append(x[0][1])
key = ''.join(key)
print(key)

alphabet frequency = [[0.18505712004976, ' '], [0.0975032137896586, 'e'], [0.0692048516144487, 't'], [0.0640553023616335, 'a'], [0.0631141332567514, 'o'], [0.0561516595676339, 'i'], [0.055226823115331, 'n'], [0.0528674353448129, 's'], [0.0497240802504884, 'h'], [0.0483235552918135, 'r'], [0.0377937580685005, 'l'], [0.0337670786805122, 'd'], [0.0229922916685098, 'u'], [0.0217067758050807, 'm'], [0.0207519961563824, 'c'], [0.0191935888894409, 'g'], [0.0183885252227195, 'f'], [0.0179876947073359, 'w'], [0.01673076098592, 'y'], [0.0155745452887443, 'p'], [0.0145802950629794, 'b'], [0.00830311225500171, 'k'], [0.00671544231885557, 'v'], [0.00164143158421966, 'j'], [0.000996291807337308, 'x'], [0.000913948017250003, 'q'], [0.000734288838877702, 'z']]
mineapols


In [207]:
def decode_cipher(key,filename,alphabet_array) :

    alphabet = alphabet_array[0]
    inv_alphabet_dict = alphabet_array[1]
    inv_alphabet_dict = alphabet_array[2]


    ciphertext = ProcessFile(filename,'file')
    cipher_length = len(ciphertext)
    plaintext = ['?'] * cipher_length
    #print(ciphertext)
    key_length = len(key)
    #print(f'key_length={key_length}')

    output = ciphertext

    #assign number values to letters

    #For each letter, subtract
    


    i=0
    for letter in key :

        for repeat in range(0,(cipher_length//key_length)+1) :
            if (9*repeat+i)<(cipher_length) : #don't leave index range
                cipher_index = 9*repeat+i
            #print(cipher_index)
            plaintext[cipher_index] = inv_alphabet_dict[np.mod((alphabet_dict[ciphertext[cipher_index]] - alphabet_dict[letter]),len(alphabet))]
        i+=1



In [217]:
mykey = 'mineapols'

my_alphabet= ' abcdefghijklmnopqrstuvwxyz'

my_alphabet_dict = {
    ' ': 0,
    'a': 1,
    'b': 2,
    'c': 3,
    'd': 4,
    'e': 5,
    'f': 6,
    'g': 7,
    'h': 8,
    'i': 9,
    'j': 10,
    'k': 11,
    'l': 12,
    'm': 13,
    'n': 14,
    'o': 15,
    'p': 16,
    'q': 17,
    'r': 18,
    's': 19,
    't': 20,
    'u': 21,
    'v': 22,
    'w': 23,
    'x': 24,
    'y': 25,
    'z': 26
}

my_inv_alphabet_dict = {v: k for k, v in my_alphabet_dict.items()}

my_alphabet_array = [my_alphabet, my_alphabet_dict, my_inv_alphabet_dict]


decoded_cipher = decode_cipher(mykey,'ciphertext.txt', my_alphabet_array)
print(''.join(decoded_cipher))


the very first well documented description of a polyalphabetic cipher was by leon battista alberti around year fourteen sixty seven and used a metal cipher disk to switch between cipher alphabets alberti s system only switched alphabets after several words and switches were indicated by writing the letter of the corresponding alphabet in the ciphertext later johannes trithemius in his work polygraphiae invented the tabula recta a critical component of the vigen re cipher the trithemius cipher however provided a progressive rather rigid and predictable system for switching between cipher alphabets in fifteen eighty six blaise de vigenere published a type of polyalphabetic cipher called an autokey cipher because its key is based on the original plaintext before the court of henry iii of france the cipher now known as the vigenere cipher however is based on that originally described by giovan battista bellaso in his book la cifra del sig giovan battista bellaso he built upon the tabula re