<a href="https://colab.research.google.com/github/eliferdentr/Assignments/blob/main/errorcorrection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import glob

def calculate_error_rate(original_dir, corrupted_dir):
    total_original_size = 0
    total_corrupted_size = 0

    for file_name in glob.glob(os.path.join(original_dir, "*")):
        original_size = os.path.getsize(file_name)
        total_original_size += original_size
        corrupted_file_name = os.path.join(corrupted_dir, os.path.basename(file_name))
        if os.path.exists(corrupted_file_name):
            corrupted_size = os.path.getsize(corrupted_file_name)
            total_corrupted_size += corrupted_size
    error_rate = (total_corrupted_size / total_original_size) * 100

    return error_rate



In [None]:
import os
import numpy as np
import glob

def introduce_errors_to_file(file_name, error_rate):
    with open(file_name, "rb") as f:
        data = np.frombuffer(f.read(), dtype=np.uint8)

    num_errors = int(error_rate * len(data))
    error_indices = np.random.choice(len(data), size=num_errors, replace=False)

    corrupted_data = data.copy()
    corrupted_data[error_indices] = np.random.randint(0, 256, size=num_errors, dtype=np.uint8)

    new_file_name = os.path.join("/content/drive/MyDrive/openimages/compressed/corrupted_files", os.path.basename(file_name))
    with open(new_file_name, "wb") as f:
        f.write(corrupted_data.tobytes())

os.makedirs("/content/drive/MyDrive/openimages/compressed/corrupted_files", exist_ok=True)

total_error_rate = 0

for file_name in glob.glob("/content/drive/MyDrive/openimages/compressed/*.jpeg"):
    introduce_errors_to_file(file_name, 0.01)  # 1% error rate
    total_error_rate += 0.01


Average error correction rate: 1.00%


In [None]:

original_dir = "/content/drive/MyDrive/openimages/compressed"
corrupted_dir = "/content/drive/MyDrive/openimages/compressed/corrupted_files"

error_rate = calculate_error_rate(original_dir, corrupted_dir)
print("Error rate: {:.2f}%".format(error_rate))

Error rate: 99.96%


In [None]:
import os
import numpy as np
import glob
from PIL import Image

def introduce_errors_to_file(file_name, error_rate):
    image = Image.open(file_name)

    # Convert the image to RGB mode if it's not
    if image.mode != "RGB":
        image = image.convert("RGB")

    # Convert the image to a numpy array
    data = np.array(image)

    # Determine the number of pixels to corrupt based on the error rate
    num_errors = int(error_rate * data.size)

    # Randomly select the indices of pixels to corrupt
    error_indices = np.random.choice(data.size, size=num_errors, replace=False)

    # Introduce errors
    corrupted_data = data.flatten()
    corrupted_data[error_indices] = np.random.randint(0, 256, size=num_errors)

    # Reshape the corrupted data back to the original shape
    corrupted_data = corrupted_data.reshape(data.shape)
    os.makedirs("/content/drive/MyDrive/openimages/compressed/corrupted_files_webp", exist_ok=True)
    new_file_name = os.path.join("/content/drive/MyDrive/openimages/compressed/corrupted_files_webp", os.path.basename(file_name))
    corrupted_image = Image.fromarray(corrupted_data)
    corrupted_image.save(new_file_name)

for file_name in glob.glob("/content/drive/MyDrive/openimages/person/images/*.webp"):
    introduce_errors_to_file(file_name, 0.01)  # 1% error rate


Average error correction rate: 100.00%


In [None]:

original_dir = "/content/drive/MyDrive/openimages/person/images"
corrupted_dir = "/content/drive/MyDrive/openimages/compressed/corrupted_files_webp"

error_rate = calculate_error_rate(original_dir, corrupted_dir)
print("Error rate: {:.2f}%".format(error_rate))

Error rate: 33.95%


------------------------------------------------

In [None]:
!pip install pydub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
import os
import numpy as np
import glob
import shutil
from pydub import AudioSegment

def introduce_errors_to_file(file_name, error_rate):
    # Load the audio file
    audio = AudioSegment.from_file(file_name)

    # Convert to mono if it's stereo
    if audio.channels == 2:
        audio = audio.set_channels(1)

    # Convert to 16-bit if it's not
    if audio.sample_width != 2:
        audio = audio.set_sample_width(2)

    # Get the raw audio data
    raw_data = audio.raw_data

    # Convert raw data to numpy array
    samples = np.frombuffer(raw_data, dtype=np.int16)

    # Determine the number of samples to corrupt based on the error rate
    num_errors = int(error_rate * len(samples))

    # Randomly select the indices of samples to corrupt
    error_indices = np.random.choice(len(samples), size=num_errors, replace=False)
    
    corrupted_samples = samples.copy()
    corrupted_samples[error_indices] = np.random.randint(-32768, 32767, size=num_errors, dtype=np.int16)
    os.makedirs("/content/drive/MyDrive/muzikler/mp3/corrupted_files", exist_ok=True)
    os.makedirs("/content/drive/MyDrive/muzikler/aac/corrupted_files", exist_ok=True)
    corrupted_raw_data = corrupted_samples.tobytes()
    if file_name.endswith(".mp3"):
        new_file_name = os.path.join("/content/drive/MyDrive/muzikler/mp3/corrupted_files", os.path.basename(file_name))
    else:
        new_file_name = os.path.join("/content/drive/MyDrive/muzikler/aac/corrupted_files", os.path.basename(file_name))

    corrupted_audio = AudioSegment(
        data=corrupted_raw_data,
        sample_width=audio.sample_width,
        frame_rate=audio.frame_rate,
        channels=audio.channels
    )
    corrupted_audio.export(new_file_name, format=file_name.split(".")[-1])

for file_name in glob.glob("/content/drive/MyDrive/muzikler/mp3/*.mp3"):
    introduce_errors_to_file(file_name, 0.01) 

for file_name in glob.glob("/content/drive/MyDrive/muzikler/aac/*.aac"):
    introduce_errors_to_file(file_name, 0.01)  

print("Corrupted MP3 files saved in: /content/drive/MyDrive/muzikler/mp3/corrupted_files")
print("Corrupted AAC files saved in: /content/drive/MyDrive/muzikler/aac/corrupted_files")



CouldntEncodeError: ignored

In [None]:
original_dir = "/content/drive/MyDrive/muzikler/mp3"
corrupted_dir = "/content/drive/MyDrive/muzikler/mp3/corrupted_files"

error_rate = calculate_error_rate(original_dir, corrupted_dir)
print("Error rate: {:.2f}%".format(error_rate))


Error rate: 100.00%


In [None]:
import os
import numpy as np
import glob
import shutil
from pydub import AudioSegment
import subprocess

def introduce_errors_to_file(file_name, error_rate):

    audio = AudioSegment.from_file(file_name)

    if audio.channels == 2:
        audio = audio.set_channels(1)

    if audio.sample_width != 2:
        audio = audio.set_sample_width(2)

    raw_data = audio.raw_data

    samples = np.frombuffer(raw_data, dtype=np.int16)

    num_errors = int(error_rate * len(samples))

    error_indices = np.random.choice(len(samples), size=num_errors, replace=False)

    corrupted_samples = samples.copy()
    corrupted_samples[error_indices] = np.random.randint(-32768, 32767, size=num_errors, dtype=np.int16)

    corrupted_raw_data = corrupted_samples.tobytes()

    os.makedirs("/content/drive/MyDrive/muzikler/aac/corrupted_files", exist_ok=True)

    if file_name.endswith(".mp3"):
        new_file_name = os.path.join("/content/drive/MyDrive/muzikler/mp3/corrupted_files", os.path.basename(file_name))
    else:
        new_file_name = os.path.join("/content/drive/MyDrive/muzikler/aac/corrupted_files", os.path.basename(file_name))

    tmp_wav_file = "/tmp/tmp.wav"
    tmp_aac_file = "/tmp/tmp.aac"

    audio.export(tmp_wav_file, format="wav")
    subprocess.run(["ffmpeg", "-y", "-i", tmp_wav_file, "-c:a", "aac", tmp_aac_file])
    shutil.move(tmp_aac_file, new_file_name)

    os.remove(tmp_wav_file)

for file_name in glob.glob("/content/drive/MyDrive/muzikler/aac/*.aac"):
    introduce_errors_to_file(file_name, 0.01)

print("Corrupted AAC files saved in: /content/drive/MyDrive/muzikler/aac/corrupted_files")


Corrupted AAC files saved in: /content/drive/MyDrive/muzikler/aac/corrupted_files


In [None]:
original_dir = "/content/drive/MyDrive/muzikler/aac"
corrupted_dir = "/content/drive/MyDrive/muzikler/aac/corrupted_files"

error_rate = calculate_error_rate(original_dir, corrupted_dir)
print("Error rate: {:.2f}%".format(error_rate))


Error rate: 54.87%


In [None]:
!pip install ffmpeg

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ffmpeg
  Downloading ffmpeg-1.4.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ffmpeg
  Building wheel for ffmpeg (setup.py) ... [?25l[?25hdone
  Created wheel for ffmpeg: filename=ffmpeg-1.4-py3-none-any.whl size=6083 sha256=ad54772b69081b46b516bb82a0cdc33a8bc387ea1c000f445a0edd72d2f4f81c
  Stored in directory: /root/.cache/pip/wheels/8e/7a/69/cd6aeb83b126a7f04cbe7c9d929028dc52a6e7d525ff56003a
Successfully built ffmpeg
Installing collected packages: ffmpeg
Successfully installed ffmpeg-1.4


In [None]:
import os
import numpy as np
import glob
import shutil
import subprocess

def introduce_errors_to_file(file_name, error_rate):
    os.makedirs("/content/drive/MyDrive/videos/264/corrupted_files", exist_ok=True)

    new_file_name = os.path.join("/content/drive/MyDrive/videos/264/corrupted_files", os.path.basename(file_name))

    command = f"ffmpeg -i {file_name} -c:v libx264 -b:v {int(error_rate * 100)}k {new_file_name}"

    subprocess.run(command, shell=True)

for file_name in glob.glob("/content/drive/MyDrive/videos/264/*.mp4"):
    introduce_errors_to_file(file_name, 0.01)  

print("Corrupted MP4 files saved in: /content/drive/MyDrive/videos/264/corrupted_files")


Corrupted MP4 files saved in: /content/drive/MyDrive/videos/264/corrupted_files


In [None]:
original_dir = "/content/drive/MyDrive/videos/264"
corrupted_dir = "/content/drive/MyDrive/videos/264/corrupted_files"

error_rate = calculate_error_rate(original_dir, corrupted_dir)
print("Error rate: {:.2f}%".format(error_rate))

Error rate: 0.77%


In [None]:
import os
import glob
import random
import shutil

def introduce_errors_to_file(file_name, error_rate):
    with open(file_name, "rb") as f:
        data = f.read()

    characters = list(data)

    num_errors = int(error_rate * len(characters))

    error_indices = random.sample(range(len(characters)), num_errors)

    for index in error_indices:
        characters[index] = random.choice(list(set(range(256)) - {characters[index]}))
    os.makedirs("/content/drive/MyDrive/textfiles/huffmancompressed/corrupted_files", exist_ok=True)
    os.makedirs("/content/drive/MyDrive/textfiles/lzwcompressed/corrupted_files", exist_ok=True)
    corrupted_file_name = os.path.join("/content/drive/MyDrive/textfiles/huffmancompressed/corrupted_files", os.path.basename(file_name))
    with open(corrupted_file_name, "wb") as f:
        f.write(bytes(characters))

    if file_name.endswith(".lzw"):
        corrupted_file_name = os.path.join("/content/drive/MyDrive/textfiles/lzwcompressed/corrupted_files", os.path.basename(file_name))
        with open(corrupted_file_name, "wb") as f:
            f.write(bytes(characters))
for file_name in glob.glob("/content/drive/MyDrive/textfiles/huffmancompressed/*.txt"):
    introduce_errors_to_file(file_name, 0.01) 

for file_name in glob.glob("/content/drive/MyDrive/textfiles/lzwcompressed/*.lzw"):
    introduce_errors_to_file(file_name, 0.01) 

print("Corrupted Huffman compressed files saved in: /content/drive/MyDrive/textfiles/huffmancompressed/corrupted_files")
print("Corrupted LZW compressed files saved in: /content/drive/MyDrive/textfiles/lzwcompressed/corrupted_files")


Corrupted Huffman compressed files saved in: /content/drive/MyDrive/textfiles/huffmancompressed/corrupted_files
Corrupted LZW compressed files saved in: /content/drive/MyDrive/textfiles/lzwcompressed/corrupted_files


In [None]:
original_dir = "/content/drive/MyDrive/textfiles/huffmancompressed"
corrupted_dir = "/content/drive/MyDrive/textfiles/huffmancompressed/corrupted_files"

error_rate = calculate_error_rate(original_dir, corrupted_dir)
print("Error rate: {:.2f}%".format(error_rate))

Error rate: 99.08%


In [None]:
original_dir = "/content/drive/MyDrive/textfiles/lzwcompressed"
corrupted_dir = "/content/drive/MyDrive/textfiles/lzwcompressed/corrupted_files"

error_rate = calculate_error_rate(original_dir, corrupted_dir)
print("Error rate: {:.2f}%".format(error_rate))

Error rate: 94.12%


In [None]:

for file_name in glob.glob("/content/drive/MyDrive/textfiles/lzwcompressed/*.lzw"):
    introduce_errors_to_file(file_name, 0.01)
print("Corrupted LZW compressed files saved in: /content/drive/MyDrive/textfiles/lzwcompressed/corrupted_files")

Corrupted LZW compressed files saved in: /content/drive/MyDrive/textfiles/lzwcompressed/corrupted_files


In [None]:

for file_name in glob.glob("/content/drive/MyDrive/textfiles/huffmancompressed/*.txt"):
    introduce_errors_to_file(file_name, 0.01) 


--------------------------------------------------------------------------------------------------