<a href="https://colab.research.google.com/github/TrueZaiCHiK/seti_lr2/blob/main/%D0%9B%D0%B0%D0%B1%D0%BE%D1%80%D0%B0%D1%82%D0%BE%D1%80%D0%BD%D0%B0%D1%8F2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Исходные данные

In [None]:
word_length = 98

Установка и вычисление контрольной суммы

In [None]:
!pip install crc64iso

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from crc64iso.crc64iso import crc64
from math import ceil, log2
from random import randint

In [None]:
def checksum(text):
  return crc64(text)

def text_to_bits(text, encoding='utf-8', errors="ignore"):
    bits = bin(int.from_bytes(text.encode(encoding, errors), 'big'))[2:]
    return bits.zfill(8 * ((len(bits) + 7) // 8))
  
def bits_to_text(bits, encoding='utf-8', errors="ignore"):
    n = int(bits, 2)
    return n.to_bytes((n.bit_length() + 7) // 8, 'big').decode(encoding, errors) or '\0'

def encode_word(data):
  length = len(data)
  red_count = calcRedundantBits(length)
  data_with_zeros = posRedundantBits(data[::-1], red_count)
  data_with_pars = calcParityBits(data_with_zeros[::-1], red_count)
  return data_with_pars[::-1]

def decode_word(word_data):
  return remove_redundant_bits(word_data)

def decode_word_error(word_data):
  decoded_word_data = remove_redundant_bits(word_data)
  r = calcRedundantBits(len(decoded_word_data))
  idx_error = detectError(word_data[::-1], r) - 1
  if idx_error == -1 or idx_error >= len(word_data):
    return decoded_word_data, 0
  word_data = word_data[:idx_error] + ("0" if word_data[idx_error] == "1" else "1") + word_data[idx_error+1:]
  return remove_redundant_bits(word_data), 1

def posRedundantBits(data, r):
	j = 0
	k = 1
	m = len(data)
	res = ''
	for i in range(1, m + r+1):
		if(i == 2**j):
			res = res + '0'
			j += 1
		else:
			res = res + data[-1 * k]
			k += 1
	return res

def calcParityBits(arr, r):
	n = len(arr)
	for i in range(r):
		val = 0
		for j in range(1, n + 1):
			if(j & (2**i) == (2**i)):
				val = val ^ int(arr[-1 * j])
		arr = arr[:n-(2**i)] + str(val) + arr[n-(2**i)+1:]
	return arr

def calcRedundantBits(m):
	for i in range(m):
		if(2**i >= m + i + 1):
			return i

def detectError(arr, nr):
	n = len(arr)
	res = 0
	for i in range(nr):
		val = 0
		for j in range(1, n + 1):
			if(j & (2**i) == (2**i)):
				val = val ^ int(arr[-1 * j])
		res = res + val*(10**i)
	return int(str(res), 2)


def remove_redundant_bits(data):
  r_idx = 1
  i_to_remove = []
  while r_idx+1 < len(data):
    i_to_remove.append(r_idx-1)
    r_idx *= 2
  i_to_remove = i_to_remove[::-1]
  for i in i_to_remove:
    data = data[:i] + data[i+1:]

  return data

In [None]:
def encode(text, word_length):
  data = text_to_bits(text)
  data_length = len(data)
  num_blocks = ceil(data_length / word_length)
  data_blocks = [
      encode_word(data[i*word_length:(i+1)*word_length])
      for i in range(num_blocks)
  ]

  return data_blocks, checksum(text)


def decode(words_data):
  decoded_data = "".join(
      decode_word(word_data)
      for word_data in words_data
  )
  decoded_text = bits_to_text(decoded_data)
  return decoded_text, checksum(decoded_text)


def decode_error(words_data):
  decoded_data = ""
  errors = 0
  for word_data in words_data:
    decoded_word, error = decode_word_error(word_data)
    decoded_data += decoded_word
    errors += error
  decoded_text = bits_to_text(decoded_data)
  return decoded_text, errors, checksum(decoded_text)

In [None]:
def place_error(words_data, word_num, bit_num):
  word_data = words_data[word_num]
  word_data = word_data[:bit_num] + ("0" if word_data[bit_num] == "1" else "1") + word_data[bit_num+1:]
  words_data[word_num] = word_data
  return words_data


def place_random_errors(words_data, max_errors_per_word):
  errors_info = []
  broken_words = 0
  for word_num in range(len(words_data)):
    amount_of_errors  = randint(0, max_errors_per_word)
    broken_words += 1 if amount_of_errors else 0
    for err in range(amount_of_errors):
      error_idx = randint(0, len(words_data[word_num])-1)
      words_data = place_error(words_data, word_num, error_idx)
      errors_info.append([word_num, error_idx])
  return words_data, broken_words, errors_info

In [None]:
def main(text, word_length, max_broken_bits_per_word):
  encoded_blocks, check_init = encode(text, word_length)

  encoded_blocks, broken_words, errors_info = place_random_errors(encoded_blocks, max_broken_bits_per_word)
  print(f"Amount of broken words: {broken_words}")
  print(f"Amount of broken bits: {len(errors_info)}")

  decoded_text, check_dec = decode(encoded_blocks)
  print(f"Decoded text: {decoded_text}")
  print(f"Is the same? {decoded_text == text}")

  decoded_text_err, found_error_words, check_err = decode_error(encoded_blocks)
  print(f"Decoded text (fixing errors): {decoded_text_err}")
  print(f"Is the same? {decoded_text_err == text}")

  print(f"Found {found_error_words} broken words of {len(encoded_blocks)} total.")
  print(f"Found all errors? {len(errors_info) == found_error_words}")
  print(f"Decoding checksum: {check_init == check_dec}\nDecoding with errors fixed checksum: {check_init == check_err}")

In [None]:
text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
main(text, word_length, max_broken_bits_per_word=1)

Amount of broken words: 68
Amount of broken bits: 68
Decoded text: Lorem ipsum tolor sit amet, consectetur adipiscing elit$ sed do eiusmod"tempor incididunt ut labore et dolOre mawna aliqua. Ut enim ad"minim veniam, quis nostrud exercitation ullamco laboris"nisi ut aliuip ex ea commodo consequat. Duis aute irure dolor in(reprehenderit i. voluptate velit ess% cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cuidatat non proident, sunt in cqlpa qui officia deserunT mollit anim id est laborum. Lorem pqum dolor sit amet, consectetur adipiscing elit, sed do eiusmod Tempor incididunt ut labore et dolorE magna aliqua. Ut enim0ad minim veniam, quis nostrud exercitation ullamco labgris nisi(ut aliquip ex ea0commodk consequat. Duis aute irure do|or in repehendgrit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat$non proident,"sunt kn culpa qui oficia deserunt mollit anim id est labor}m. Lorem ipsum dolor sit amet, consectetur adipis

In [None]:
text = """Текст, по-видимому, является искажённым отрывком из философского трактата Марка Туллия Цицерона «О пределах добра и зла[en]», написанного в 45 году до н. э. на латинском языке. Обнаружение сходства приписывается Ричарду Макклинтоку[1]. Испорченный текст, вероятно, происходит от его издания в Loeb Classical Library 1914 года, в котором слово dolorem разбито переносом так, что страница 36 начинается с lorem ipsum… (do- осталось на предыдущей)[2].

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

В оригинале абзац выглядит так:
Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo. Nemo enim ipsam voluptatem, quia voluptas sit, aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos, qui ratione voluptatem sequi nesciunt, neque porro quisquam est, qui dolorem ipsum, quia dolor sit, amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt, ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit, qui in ea voluptate velit esse, quam nihil molestiae consequatur, vel illum, qui dolorem eum fugiat, quo voluptas nulla pariatur? At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio. Nam libero tempore, cum soluta nobis est eligendi optio, cumque nihil impedit, quo minus id, quod maxime placeat, facere possimus, omnis voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis debitis aut rerum necessitatibus saepe eveniet, ut et voluptates repudiandae sint et molestiae non recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus maiores alias consequatur aut perferendis doloribus asperiores repellat."""
main(text, word_length, max_broken_bits_per_word=1)

Amount of broken words: 137
Amount of broken bits: 137
Decoded text: Текст, по-видимому, является искажённым отрывком Из философского трактата Марка Туллия Цицермна «О прзделѰх добра и зла[en]», на?исанного в 45 году дп н. э/ на латиԽскоҼ языке. Обнаружение сходства!приPисывается Ричарду Макклинтоку[1]. Испорченный текст, верояно, происходиђ от его издания в Loeb Classical Libzary 1914 года,(в котPром слово"dolorem разбиуо пеѐеносом так, чт стр0ниіа 36 начинается с dorem i`sum… (do- осталсь на предыдущей){2].

Lorem ipsum dolor siT amet, consect%tur adipiscing elit, sed do eIusmod tempor inciDidunt ut laborE et dolore$magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco abors nisi ut aliuip ex ea commodo con3equat. Duis`aute irure dolor in rexrehenderit In voluptate velit!esse cillum do|ore eu fugiat julla pariatur. Excepte5r sint occaecat cupIdatat nol proident, sunt in cumpa qui officia deserunt mollit afim id est0laborum.

В оиги=але аСзац выглядит так:
Sed ut pe