<a href="https://colab.research.google.com/github/fatimasood/Data-Augmentation/blob/main/Data_Augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# important libraries
import requests
from PIL import Image, ImageEnhance
import numpy as np
from pathlib import Path
from io import BytesIO
import random

In [2]:
# Function to download an image from the internet
def download_image(url, filename):
    response = requests.get(url)
    img = Image.open(BytesIO(response.content)).convert("RGB")
    img.save(filename)
    return img

In [3]:
# Image URLs
image_urls = [
    "https://images.unsplash.com/photo-1518791841217-8f162f1e1131",   # Cat
    "https://images.unsplash.com/photo-1501785888041-af3ef285b470"    # Landscape
]

In [4]:
# Output folder
out = Path("aug_outputs")
out.mkdir(exist_ok=True)

In [5]:
# Augmentation functions

# for rotattion
def rotate(im, angle):
    return im.rotate(angle, expand=True)

# for flip horizontally
def flip_horizontal(im):
    return im.transpose(Image.FLIP_LEFT_RIGHT)

# for brightness
def brightness(im, factor):
    enhancer = ImageEnhance.Brightness(im)
    return enhancer.enhance(factor)

# guassian noise
def gaussian_noise(im, sigma=10):
    arr = np.array(im).astype(np.float32)
    noise = np.random.normal(0, sigma, arr.shape)
    arr = np.clip(arr + noise, 0, 255).astype(np.uint8)
    return Image.fromarray(arr)


In [6]:
# Download + apply augmentations

for i, url in enumerate(image_urls, start=1):
    img_path = f"img{i}.jpg"

    print(f"Downloading Image {i}...")
    im = download_image(url, img_path)

    # Save original
    im.save(out / f"img{i}_orig.jpg")

    #  Rotation 25°
    r = rotate(im, 25)
    r.save(out / f"img{i}_rot25.jpg")

    # Horizontal Flip
    f = flip_horizontal(im)
    f.save(out / f"img{i}_hflip.jpg")

    # Brightness Increase (1.5x)
    b = brightness(im, 1.5)
    b.save(out / f"img{i}_bright15.jpg")

print("All augmentations applied!")


Downloading Image 1...
Downloading Image 2...
All augmentations applied!


In [7]:
import pandas as pd
import random

# Given dataset
data_q3 = {
    'ID': [1, 2, 3, 4, 5],
    'Feature1': [2.1, 2.4, 5.2, 5.6, 5.9],
    'Feature2': [1.3, 1.1, 4.1, 4.3, 4.0],
    'Class': [0, 0, 1, 1, 1]
}
df_q3 = pd.DataFrame(data_q3)
print("Original Dataset:")
print(df_q3)

# Sampling with replacement
print("\n Sampling WITH Replacement:")
with_replacement_sample = df_q3.sample(n=3, replace=True, random_state=42)
print(with_replacement_sample[['ID', 'Feature1', 'Feature2', 'Class']].reset_index(drop=True))

# Sampling without replacement
print("\n Sampling WITHOUT Replacement:")
without_replacement_sample = df_q3.sample(n=3, replace=False, random_state=42)
print(without_replacement_sample[['ID', 'Feature1', 'Feature2', 'Class']].reset_index(drop=True))

Original Dataset:
   ID  Feature1  Feature2  Class
0   1       2.1       1.3      0
1   2       2.4       1.1      0
2   3       5.2       4.1      1
3   4       5.6       4.3      1
4   5       5.9       4.0      1

 Sampling WITH Replacement:
   ID  Feature1  Feature2  Class
0   4       5.6       4.3      1
1   5       5.9       4.0      1
2   3       5.2       4.1      1

 Sampling WITHOUT Replacement:
   ID  Feature1  Feature2  Class
0   2       2.4       1.1      0
1   5       5.9       4.0      1
2   3       5.2       4.1      1


In [10]:
# Given dataset
data_q4 = {
    'ID': ['A1', 'A2', 'B1', 'B2', 'B3', 'B4'],
    'Feature1': [2.2, 2.5, 5.0, 5.3, 5.7, 5.8],
    'Feature2': [1.0, 1.2, 4.0, 4.1, 4.2, 4.3],
    'Class': [0, 0, 1, 1, 1, 1]
}

df_q4 = pd.DataFrame(data_q4)
print("Original Dataset:")
print(df_q4)

# Oversampling with replacement
print("\nOversampling WITH Replacement:")
class_0 = df_q4[df_q4['Class'] == 0]
class_1 = df_q4[df_q4['Class'] == 1]

# We need 4 samples of class 0 (same as class 1)
oversampled_class_0 = class_0.sample(n=len(class_1), replace=True, random_state=42)
oversampled_dataset = pd.concat([oversampled_class_0, class_1]).sort_values('ID')
print(oversampled_dataset.reset_index(drop=True))

# Undersampling
print("\nUndersampling (select 2 samples from Class 1):")
undersampled_class_1 = class_1.sample(n=len(class_0), replace=False, random_state=42)
undersampled_dataset = pd.concat([class_0, undersampled_class_1]).sort_values('ID')
print(undersampled_dataset.reset_index(drop=True))



Original Dataset:
   ID  Feature1  Feature2  Class
0  A1       2.2       1.0      0
1  A2       2.5       1.2      0
2  B1       5.0       4.0      1
3  B2       5.3       4.1      1
4  B3       5.7       4.2      1
5  B4       5.8       4.3      1

Oversampling WITH Replacement:
   ID  Feature1  Feature2  Class
0  A1       2.2       1.0      0
1  A1       2.2       1.0      0
2  A1       2.2       1.0      0
3  A2       2.5       1.2      0
4  B1       5.0       4.0      1
5  B2       5.3       4.1      1
6  B3       5.7       4.2      1
7  B4       5.8       4.3      1

Undersampling (select 2 samples from Class 1):
   ID  Feature1  Feature2  Class
0  A1       2.2       1.0      0
1  A2       2.5       1.2      0
2  B2       5.3       4.1      1
3  B4       5.8       4.3      1


In [17]:
import cv2
import numpy as np
import requests
from PIL import Image
from io import BytesIO

# Function to download an image from the internet
def download_image(url):
    response = requests.get(url)
    img = Image.open(BytesIO(response.content)).convert("RGB")
    return np.array(img)

# Image URLs
image_urls = [
    "https://images.unsplash.com/photo-1518791841217-8f162f1e1131?w=300",   # Cat
    "https://images.unsplash.com/photo-1501785888041-af3ef285b470?w=300"    # Landscape
]

# Download images
print("Downloading images...")
img1 = download_image(image_urls[0])
img2 = download_image(image_urls[1])

# Resize images to same size
img1 = cv2.resize(img1, (300, 300))
img2 = cv2.resize(img2, (300, 300))

# Save original images
Image.fromarray(img1).save("original_cat.jpg")
Image.fromarray(img2).save("original_landscape.jpg")
print("Original images saved!")

# MixUp Function
def apply_mixup(img1, img2, lam=0.5):
    # Use fixed lambda for visible results
    mixed_img = (lam * img1.astype(np.float32) + (1 - lam) * img2.astype(np.float32)).astype(np.uint8)
    return mixed_img

# Apply MixUp with different lambda values
print("\nApplying MixUp...")
mixup_1 = apply_mixup(img1, img2, lam=0.7)  # 70% cat, 30% landscape
mixup_2 = apply_mixup(img1, img2, lam=0.3)  # 30% cat, 70% landscape

Image.fromarray(mixup_1).save("mixup_70_30.jpg")
Image.fromarray(mixup_2).save("mixup_30_70.jpg")
print("MixUp results saved!")

# CutMix Function
def apply_cutmix(img1, img2):
    h, w = img1.shape[:2]

    # Fixed bounding box for consistent results
    r_w, r_h = int(w * 0.5), int(h * 0.5)  # 50% size
    r_x, r_y = int(w * 0.25), int(h * 0.25)  # Center position

    # Create mixed image
    mixed = img1.copy()
    mixed[r_y:r_y+r_h, r_x:r_x+r_w] = img2[r_y:r_y+r_h, r_x:r_x+r_w]

    return mixed

# Apply CutMix
print("Applying CutMix...")
cutmix_result = apply_cutmix(img1, img2)
Image.fromarray(cutmix_result).save("cutmix_result.jpg")
print("✓ CutMix result saved!")

Downloading images...
Original images saved!

Applying MixUp...
✓ MixUp results saved!
Applying CutMix...
✓ CutMix result saved!
