<a href="https://colab.research.google.com/github/TrueZaiCHiK/seti_lr2/blob/main/%D0%9B%D0%B0%D0%B1%D0%BE%D1%80%D0%B0%D1%82%D0%BE%D1%80%D0%BD%D0%B0%D1%8F2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Исходные данные

In [None]:
word_length = 98

Установка и вычисление контрольной суммы

In [None]:
!pip install crc64iso

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from crc64iso.crc64iso import crc64
from math import ceil, log2
from random import randint

In [None]:
def checksum(text):
  return crc64(text)

def text_to_bits(text, encoding='utf-8', errors="ignore"):
    bits = bin(int.from_bytes(text.encode(encoding, errors), 'big'))[2:]
    return bits.zfill(8 * ((len(bits) + 7) // 8))
  
def bits_to_text(bits, encoding='utf-8', errors="ignore"):
    n = int(bits, 2)
    return n.to_bytes((n.bit_length() + 7) // 8, 'big').decode(encoding, errors) or '\0'

def encode_word(data):
  length = len(data)
  red_count = calcRedundantBits(length)
  data_with_zeros = posRedundantBits(data[::-1], red_count)
  data_with_pars = calcParityBits(data_with_zeros[::-1], red_count)
  return data_with_pars[::-1]

def decode_word(word_data):
  return remove_redundant_bits(word_data)

def decode_word_error(word_data):
  decoded_word_data = remove_redundant_bits(word_data)
  r = calcRedundantBits(len(decoded_word_data))
  idx_error = detectError(word_data[::-1], r) - 1
  if idx_error == -1 or idx_error >= len(word_data):
    return decoded_word_data, 0
  word_data = word_data[:idx_error] + ("0" if word_data[idx_error] == "1" else "1") + word_data[idx_error+1:]
  return remove_redundant_bits(word_data), 1

def posRedundantBits(data, r):
	j = 0
	k = 1
	m = len(data)
	res = ''
	for i in range(1, m + r+1):
		if(i == 2**j):
			res = res + '0'
			j += 1
		else:
			res = res + data[-1 * k]
			k += 1
	return res

def calcParityBits(arr, r):
	n = len(arr)
	for i in range(r):
		val = 0
		for j in range(1, n + 1):
			if(j & (2**i) == (2**i)):
				val = val ^ int(arr[-1 * j])
		arr = arr[:n-(2**i)] + str(val) + arr[n-(2**i)+1:]
	return arr

def calcRedundantBits(m):
	for i in range(m):
		if(2**i >= m + i + 1):
			return i

def detectError(arr, nr):
	n = len(arr)
	res = 0
	for i in range(nr):
		val = 0
		for j in range(1, n + 1):
			if(j & (2**i) == (2**i)):
				val = val ^ int(arr[-1 * j])
		res = res + val*(10**i)
	return int(str(res), 2)


def remove_redundant_bits(data):
  r_idx = 1
  i_to_remove = []
  while r_idx+1 < len(data):
    i_to_remove.append(r_idx-1)
    r_idx *= 2
  i_to_remove = i_to_remove[::-1]
  for i in i_to_remove:
    data = data[:i] + data[i+1:]

  return data

In [None]:
def encode(text, word_length):
  data = text_to_bits(text)
  data_length = len(data)
  num_blocks = ceil(data_length / word_length)
  data_blocks = [
      encode_word(data[i*word_length:(i+1)*word_length])
      for i in range(num_blocks)
  ]

  return data_blocks, checksum(text)


def decode(words_data):
  decoded_data = "".join(
      decode_word(word_data)
      for word_data in words_data
  )
  decoded_text = bits_to_text(decoded_data)
  return decoded_text, checksum(decoded_text)


def decode_error(words_data):
  decoded_data = ""
  errors = 0
  for word_data in words_data:
    decoded_word, error = decode_word_error(word_data)
    decoded_data += decoded_word
    errors += error
  decoded_text = bits_to_text(decoded_data)
  return decoded_text, errors, checksum(decoded_text)

In [None]:
def place_error(words_data, word_num, bit_num):
  word_data = words_data[word_num]
  word_data = word_data[:bit_num] + ("0" if word_data[bit_num] == "1" else "1") + word_data[bit_num+1:]
  words_data[word_num] = word_data
  return words_data


def place_random_errors(words_data, max_errors_per_word):
  errors_info = []
  broken_words = 0
  for word_num in range(len(words_data)):
    amount_of_errors  = randint(0, max_errors_per_word)
    broken_words += 1 if amount_of_errors else 0
    for err in range(amount_of_errors):
      error_idx = randint(0, len(words_data[word_num])-1)
      words_data = place_error(words_data, word_num, error_idx)
      errors_info.append([word_num, error_idx])
  return words_data, broken_words, errors_info

In [None]:
def main(text, word_length, max_broken_bits_per_word):
  encoded_blocks, check_init = encode(text, word_length)

  encoded_blocks, broken_words, errors_info = place_random_errors(encoded_blocks, max_broken_bits_per_word)
  print(f"Amount of broken words: {broken_words}")
  print(f"Amount of broken bits: {len(errors_info)}")

  decoded_text, check_dec = decode(encoded_blocks)
  print(f"Decoded text: {decoded_text}")
  print(f"Is the same? {decoded_text == text}")

  decoded_text_err, found_error_words, check_err = decode_error(encoded_blocks)
  print(f"Decoded text (fixing errors): {decoded_text_err}")
  print(f"Is the same? {decoded_text_err == text}")

  print(f"Found {found_error_words} broken words of {len(encoded_blocks)} total.")
  print(f"Found all errors? {len(errors_info) == found_error_words}")
  print(f"Decoding checksum: {check_init == check_dec}\nDecoding with errors fixed checksum: {check_init == check_err}")

# Без ошибок

In [33]:
text = "Регистрация изображения - это процесс преобразования различных наборов данных в одну систему координат. Данные могут быть несколькими фотографиями, данными с разных датчиков, временами, глубинами или точками обзора. Он используется в компьютерном зрении , медицинской визуализации , военном автоматическом распознавании целей , а также в составлении и анализе изображений и данных со спутников. Регистрация необходима для того, чтобы иметь возможность сравнивать или интегрировать данные, полученные в результате этих различных измерений.Image registration is the process of transforming different sets of data into one coordinate system. Data may be multiple photographs, data from different sensors, times, depths, or viewpoints.[1] It is used in computer vision, medical imaging,[2] military automatic target recognition, and compiling and analyzing images and data from satellites. Registration is necessary in order to be able to compare or integrate the data obtained from these different measurements.Image registration has applications in remote sensing (cartography updating), and computer vision. Due to the vast range of applications to which image registration can be applied, it is impossible to develop a general method that is optimized for all uses.Medical image registration (for data of the same patient taken at different points in time such as change detection or tumor monitoring) often additionally involves elastic (also known as nonrigid) registration to cope with deformation of the subject (due to breathing, anatomical changes, and so forth).[18][19][20] Nonrigid registration of medical images can also be used to register a patient's data to an anatomical atlas, such as the Talairach atlas for neuroimaging.In astrophotography image alignment and stacking are often used to increase the signal to noise ratio for faint objects. Without stacking it may be used to produce a timelapse of events such as a planet's rotation of a transit across the Sun. Using control points (automatically or manually entered), the computer performs transformations on one image to make major features align with a second or multiple images. This technique may also be used for images of different sizes, to allow images taken through different telescopes or lenses to be combined.In cryo-TEM instability causes specimen drift and many fast acquisitions with accurate image registration is required to preserve high resolution and obtain high signal to noise images. For low SNR data, the best image registration is achieved by cross-correlating all permutations of images in an image stack.[21]Image registration is an essential part of panoramic image creation. There are many different techniques that can be implemented in real time and run on embedded devices like cameras and camera-phones."
main(text, word_length, max_broken_bits_per_word=0)

Amount of broken words: 0
Amount of broken bits: 0
Decoded text: Регистрация изображения - это процесс преобразования различных наборов данных в одну систему координат. Данные могут быть несколькими фотографиями, данными с разных датчиков, временами, глубинами или точками обзора. Он используется в компьютерном зрении , медицинской визуализации , военном автоматическом распознавании целей , а также в составлении и анализе изображений и данных со спутников. Регистрация необходима для того, чтобы иметь возможность сравнивать или интегрировать данные, полученные в результате этих различных измерений.Image registration is the process of transforming different sets of data into one coordinate system. Data may be multiple photographs, data from different sensors, times, depths, or viewpoints.[1] It is used in computer vision, medical imaging,[2] military automatic target recognition, and compiling and analyzing images and data from satellites. Registration is necessary in order to be able to 

# Не более 1 ошибки на слово

In [36]:
main(text, word_length, max_broken_bits_per_word=1)

Amount of broken words: 136
Amount of broken bits: 136
Decoded text: Ргистрацию изображееия - эQо процесс пфеобразования различԽых наборов даннՋх в одпу систему координбт. ДАнные моҳут быть несколькиѴи фотографиямШ, данными с ра7ных датиков, жременами, глѓбинPми или точкаи обзофа. Он испольуется в коМпьютернм$зрении , меДициской визуализаՆии , ТоеннОм автомаъичеком распознавѰнии целей , а"также в составлении и анализе Ҹзображений и данных со спутиков Регистрация необходиЬа для тпго, чтобы иметь возможность сраҲнивать или интегрироать данные, Pолученные в результате этиэ различных из<ерений.Image registration is th% process of transforming different sets of data into one comrdinate system. Data may be multiple photogpaphs, data fro} different sensors, times, depths, or viewpoints.[1] It is used In computer viion, medical imaging,[2] militaby autmatic target recognition, and compiling and analyzing imager and data brom satellites. Regisvration is necessary in order to be able to compare

#Не более двух ошибок на слово

In [35]:
main(text, word_length, max_broken_bits_per_word=2)

Amount of broken words: 178
Amount of broken bits: 263
Decoded text: Регитрация изображPия - это прпцесс преобраԷования различных наборв данных в ону сиЁтему кординат. Даные мܾгут быть несколькими$фотоУрафиями( данԽыми щ разных датчؘков, врменаШ,"глуұинми ил тPчкми жбзорР. Он кспользՃетсՏ в ккмпьютерном Зрееии , медицинскй визуакиацШظ , вонном автЮматическоѼ распознавании цеей , а такж5 в сосавлении и анлизе избраженкй и данных со ѡптников. Релитрация нзобходииа для уого, чтобћ имеуь ѲозмжнЮсть сравнивать илҸ и=тегтиров԰ть даؽные, полученные в ре7улѬтае этих рАзличPых измЕрзниع.Imagd registration isthe proc!ss of transfobming different rets og data into one coordinate"s}stem.`ata may be multm0le photographs, datA`from differen| sensobs, times, depths, or viewpointsn[1] It is(use$ in bomputer vision( medical imagi.g,[2]`military automapic target recognition, and compilino and ana,yzi.g imafes and taua from$satelnites. Registztion is n%cessry in order t be able to compare or integrate