In [1]:
from data_utils.foreground_loader import IP102Dataset, ForegroundBlur, ForegroundRotate
from data_utils.background_loader import PaddyDiseaseClassificationDataset, RiceLeafsDataset, BackgroundRandomCrop
import torchvision.transforms as T
from tqdm import tqdm
import numpy as np


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ip102_dir = 'D:/git/pestvision_data/pestvision_data/foreground_data/Detection_IP102' # change this path appropriately
source_range_big = (40, 60)
source_range_small = (60, 80)
transform_BlurRotate = T.Compose(
    [ForegroundBlur(blur_prob=0.35), ForegroundRotate(rotation_prob=0.35)]
)

ip102_dataset_train = IP102Dataset(dataset_dir= ip102_dir,
                                   split='train',
                                   source_image_range_big=source_range_big,
                                   source_image_range_small=source_range_small,
                                   transform=transform_BlurRotate)

ip102_dataset_val = IP102Dataset(dataset_dir= ip102_dir,
                                   split='val',
                                   source_image_range_big=source_range_big,
                                   source_image_range_small=source_range_small,
                                   transform=transform_BlurRotate)

ip102_dataset_test = IP102Dataset(dataset_dir= ip102_dir,
                                   split='test',
                                   source_image_range_big=source_range_big,
                                   source_image_range_small=source_range_small,
                                   transform=transform_BlurRotate)

print(f'IP102 train size: {len(ip102_dataset_train)}')
print(f'IP102 val size: {len(ip102_dataset_val)}')
print(f'IP102 test size: {len(ip102_dataset_test)}')

foreground_datasets = {'train': ip102_dataset_train, 'val': ip102_dataset_val, 'test': ip102_dataset_test}

IP102 train size: 12104
IP102 val size: 3069
IP102 test size: 3796


In [3]:
paddy_disease_classification_dir = 'D:/git/pestvision_data/pestvision_data/background_data/paddy_disease_classification' # change this path appropriately
background_transform = BackgroundRandomCrop(crop_prob=0.6)

paddy_disease_classification_dataset_train= PaddyDiseaseClassificationDataset(dataset_dir=paddy_disease_classification_dir, 
                                                                              split="train", transform=background_transform,
                                                                              target_size=512)
paddy_disease_classification_dataset_val= PaddyDiseaseClassificationDataset(dataset_dir=paddy_disease_classification_dir,
                                                                            split="val", transform=background_transform,
                                                                            target_size=512)
paddy_disease_classification_dataset_test= PaddyDiseaseClassificationDataset(dataset_dir=paddy_disease_classification_dir,
                                                                             split="test", transform=background_transform,
                                                                             target_size=512)

print(f'Paddy Disease Classification train size: {len(paddy_disease_classification_dataset_train)}')
print(f'Paddy Disease Classification val size: {len(paddy_disease_classification_dataset_val)}')
print(f'Paddy Disease Classification test size: {len(paddy_disease_classification_dataset_test)}')

background_datasets = {'train': paddy_disease_classification_dataset_train, 'val': paddy_disease_classification_dataset_val, 'test': paddy_disease_classification_dataset_test}

Paddy Disease Classification train size: 6762
Paddy Disease Classification val size: 1559
Paddy Disease Classification test size: 2086


In [8]:
import torch
import torch.optim as optim
from PIL import Image
from skimage.io import imsave
from utils_deep_image_blending import compute_gt_gradient, make_canvas_mask, numpy2tensor, laplacian_filter_tensor, MeanShift, Vgg16, gram_matrix
import os
from data_utils.foreground_loader import AbstractForegroundPestDataset
from data_utils.background_loader import AbstractBackgroundDataset
import numpy as np
from abc import ABC, abstractmethod
from tqdm import tqdm
from typing import Tuple

# Abstract class for synthetic data generation: pest blending
# Concrete classes for different pest blending methods

class AbstractPestBlending(ABC):

    def __init__(self,
                 outputImagesDir: str,
                 outputLabelsDir: str,
                 outputMetadataDir: str,
                 device: str,
                 max_pests_per_image: int):

        self.outputImagesDir = outputImagesDir
        self.outputLabelsDir = outputLabelsDir
        self.outputMetadataDir = outputMetadataDir
        self.device = device
        self.max_pests = max_pests_per_image

    @abstractmethod
    def generate_blended_image(self,
                               foreground_dataset: AbstractForegroundPestDataset,
                               N_foreground: int,
                               background_dataset: AbstractBackgroundDataset,
                               N_background: int,
                               split: str,
                               file_save_index: int):
        pass


class DeepImageBlending(AbstractPestBlending):

    def __init__(self,
                 outputImagesDir: str,
                 outputLabelsDir: str,
                 outputMetadataDir: str,
                 device: str,
                 max_pests_per_image: int = 10,
                 target_image_size: int = 512,
                 source_image_range_big: Tuple[int, int] = (40, 60),
                 source_image_range_small: Tuple[int, int] = (60, 80),
                 num_steps1: int = 1000):
        """
        Parameters:

            outputImagesDir (str): path to the directory where the output images will be saved.
            outputLabelsDir (str): path to the directory where the output labels will be saved.
            outputMetadataDir (str): path to the directory where the output metadata will be saved.
            device (str): device to run the model on.
            max_pests_per_image (int): maximum number of pests per image.
            target_image_size (int): size of the target image.
            source_image_range_big (Tuple[int, int]): range for big source images.
            source_image_range_small (Tuple[int, int]): range for small source images.
            num_steps1 (int): number of steps for the first pass of deep image blending.
            (Note: num_steps2 = 0)
        """

        super().__init__(outputImagesDir, outputLabelsDir,
                         outputMetadataDir, device, max_pests_per_image)

        self.source_image_range_small = source_image_range_small
        self.source_image_range_big = source_image_range_big
        self.ts = target_image_size
        self.num_steps1 = num_steps1

        for split in ['train', 'val', 'test']:
            os.makedirs(os.path.join(self.outputImagesDir, split), exist_ok=True)
            os.makedirs(os.path.join(self.outputLabelsDir, split), exist_ok=True)
            os.makedirs(os.path.join(self.outputMetadataDir, split), exist_ok=True)

    def generate_blended_image(self,
                               foreground_dataset: AbstractForegroundPestDataset,
                               N_foreground: int,
                               background_dataset: AbstractBackgroundDataset,
                               N_background: int,
                               split: str,
                               file_save_index: int):
        """
        Generate blended image using deep image blending.

        Parameters:

                foreground_dataset (AbstractForegroundPestDataset): dataset of foreground images.
                N_foreground (int): number of samples in the foreground dataset.
                background_dataset (AbstractBackgroundDataset): dataset of background images.
                N_background (int): number of samples in the background dataset.
                split (str): split of the dataset.
                file_save_index (int): index to save the file.
        """

        background_sample = background_dataset[np.random.randint(N_background)]
        target_img = np.array(background_sample['image'])
        target_filename = background_sample['image_filename']
        no_pests = np.random.randint(low=0, high=self.max_pests + 1)

        if no_pests == 0:  # no labels or metadata stored

            no_pest_img_file = os.path.join(
                self.outputImagesDir, split, f'{file_save_index}.png')
            imsave(no_pest_img_file, target_img.astype(np.uint8))

            str_metadata = f"{file_save_index}.png {0}/{no_pests} {target_filename}\n"

            metadata_path = os.path.join(
                self.outputMetadataDir, split, "metadata.txt")
            with open(metadata_path, 'a') as f:
                f.write(str_metadata)

        centers = np.random.randint(
            low=self.source_image_range_small[1], high=self.ts - self.source_image_range_small[1], size=(no_pests, 2))

        for j in tqdm(range(no_pests), desc=" Iterating over no_pests", leave=False):

            foreground_sample = foreground_dataset[np.random.randint(N_foreground)]

            source_img = np.array(foreground_sample["source_img_resized"])
            source_filename = foreground_sample['source_filename']
            mask_img = np.array(foreground_sample["mask_img_resized"])
            mask_img[mask_img > 0] = 1
            pest_class_id = foreground_sample["pest_class_id"]
            ss = foreground_sample["source_size"]
            is_big = foreground_sample["is_big"]

            x_start = centers[j][0]
            y_start = centers[j][1]

            # First Pass
            grad_weight = 1e4
            style_weight = 1e4
            content_weight = 1
            tv_weight = 1e-6

            canvas_mask = make_canvas_mask(x_start, y_start, target_img, mask_img)
            canvas_mask = numpy2tensor(canvas_mask, self.device)
            canvas_mask = canvas_mask.squeeze(0).repeat(3, 1).view(3, self.ts, self.ts).unsqueeze(0)

            gt_gradient = compute_gt_gradient(x_start, y_start, source_img, target_img, mask_img, self.device)

            source_img = torch.from_numpy(source_img).unsqueeze(0).transpose(1, 3).transpose(2, 3).float().to(self.device)
            target_img = torch.from_numpy(target_img).unsqueeze(0).transpose(1, 3).transpose(2, 3).float().to(self.device)
            input_img = torch.randn(target_img.shape).to(self.device)

            mask_img = numpy2tensor(mask_img, self.device)
            mask_img = mask_img.squeeze(0).repeat(3, 1).view(3, ss, ss).unsqueeze(0)

            def get_input_optimizer(input_img):
                optimizer = optim.LBFGS([input_img.requires_grad_()])
                return optimizer
            optimizer = get_input_optimizer(input_img)

            mse = torch.nn.MSELoss()

            mean_shift = MeanShift(self.device)
            vgg = Vgg16().to(self.device)

            run = [0]
            while run[0] < self.num_steps1:

                def closure():
                    blend_img = torch.zeros(target_img.shape).to(self.device)
                    blend_img = input_img*canvas_mask + target_img*(canvas_mask-1)*(-1)

                    pred_gradient = laplacian_filter_tensor(blend_img, self.device)

                    grad_loss = 0
                    for c in range(len(pred_gradient)):
                        grad_loss += mse(pred_gradient[c], gt_gradient[c])
                    grad_loss /= len(pred_gradient)
                    grad_loss *= grad_weight

                    target_features_style = vgg(mean_shift(target_img))
                    target_gram_style = [gram_matrix(y) for y in target_features_style]

                    blend_features_style = vgg(mean_shift(input_img))
                    blend_gram_style = [gram_matrix(y) for y in blend_features_style]

                    style_loss = 0
                    for layer in range(len(blend_gram_style)):
                        style_loss += mse(blend_gram_style[layer], target_gram_style[layer])
                    style_loss /= len(blend_gram_style)
                    style_loss *= style_weight

                    blend_obj = blend_img[:, :, int(x_start-source_img.shape[2]*0.5):int(x_start+source_img.shape[2]*0.5), int(
                        y_start-source_img.shape[3]*0.5):int(y_start+source_img.shape[3]*0.5)]
                    source_object_features = vgg(mean_shift(source_img*mask_img))
                    blend_object_features = vgg(mean_shift(blend_obj*mask_img))
                    content_loss = content_weight * mse(blend_object_features.relu2_2, source_object_features.relu2_2)
                    content_loss *= content_weight

                    tv_loss = torch.sum(torch.abs(blend_img[:, :, :, :-1] - blend_img[:, :, :, 1:])) + \
                        torch.sum(torch.abs(blend_img[:, :, :-1, :] - blend_img[:, :, 1:, :]))
                    tv_loss *= tv_weight

                    loss = grad_loss + style_loss + content_loss + tv_loss
                    optimizer.zero_grad()
                    loss.backward()

                    run[0] += 1
                    return loss

                optimizer.step(closure)

            input_img.data.clamp_(0, 255)

            blend_img = torch.zeros(target_img.shape).to(self.device)
            blend_img = input_img*canvas_mask + target_img*(canvas_mask-1)*(-1)
            blend_img_np = blend_img.transpose(1, 3).transpose(1, 2).cpu().data.numpy()[0]

            if self.num_steps1 > 0:
                first_pass_img_file = os.path.join(self.outputImagesDir, split, f'{file_save_index}.png')
                imsave(first_pass_img_file, blend_img_np.astype(np.uint8))

                str_metadata = f"{file_save_index}.png {j+1}/{no_pests} {target_filename} {source_filename} {(y_start, x_start)}\n"

                metadata_path = os.path.join(self.outputMetadataDir, split, "metadata.txt")
                with open(metadata_path, 'a') as f:
                    f.write(str_metadata)
                    f.close()

                label_path = os.path.join(self.outputLabelsDir, split, f'{file_save_index}.txt')
                with open(label_path, 'a') as f:
                    f.write(f"{pest_class_id} {y_start/self.ts} {x_start/self.ts} {ss/self.ts} {ss/self.ts}\n")

            target_img = np.array(Image.open(first_pass_img_file).convert('RGB').resize((self.ts, self.ts)))


N_generate = 1

deep_image_blending = DeepImageBlending(outputImagesDir='D:/git/PestVisionChallenge/synthetic_data/test_dib/images',
                                        outputLabelsDir='D:/git/PestVisionChallenge/synthetic_data/test_dib/labels',
                                        outputMetadataDir='D:/git/PestVisionChallenge/synthetic_data/test_dib/metadata',
                                        device='cuda:0',
                                        max_pests_per_image=5,
                                        target_image_size=512,
                                        source_image_range_big=source_range_big,
                                        source_image_range_small=source_range_small,
                                        num_steps1=1000)

for i in tqdm(range(N_generate)):
   
   split_prob = np.random.rand()

   if split_prob < 0.65:
      split = 'train'
   elif split_prob < 0.80:
      split = 'val'
   else: 
      split = 'test'

   foreground_dataset = foreground_datasets[split]
   background_dataset = background_datasets[split]

   N_foreground, N_background = len(foreground_dataset), len(background_dataset)

   deep_image_blending.generate_blended_image(foreground_dataset = foreground_dataset,
                                              background_dataset = background_dataset,
                                              N_foreground = N_foreground,
                                              N_background = N_background,
                                              split = split,
                                              file_save_index=i)


100%|██████████| 1/1 [03:11<00:00, 191.44s/it]


In [15]:
import sympy as sp
import random
import numpy as np
import matplotlib.pyplot as plt

def generate_random_polynomial(degree, variable, coefficient_range):
    """
    Generate a random polynomial of a given degree.
    
    Parameters:
    degree (int): Degree of the polynomial
    variable (sympy.Symbol): The variable for the polynomial
    coefficient_range (tuple): Range of coefficients as (min, max)
    
    Returns:
    sympy.Poly: The generated polynomial
    """
    coefficients = [random.randint(*coefficient_range) for _ in range(degree + 1)]
    polynomial = sum(coeff * variable**i for i, coeff in enumerate(coefficients))
    return sp.Poly(polynomial, variable)

def is_invertible(polynomial):
    """
    Check if the polynomial is invertible.
    
    Parameters:
    polynomial (sympy.Poly): The polynomial to check
    
    Returns:
    bool: True if invertible, False otherwise
    """
    # A polynomial is invertible if its constant term is non-zero
    return polynomial.LC() != 0

def plot_polynomial_and_inverse(polynomial):
    """
    Plot the polynomial and its inverse function.
    
    Parameters:
    polynomial (sympy.Poly): The polynomial to plot
    """
    x = sp.symbols('x')
    y = sp.symbols('y')
    func = polynomial.as_expr()
    
    # Solve for y in terms of x
    inverse_func = sp.solve(sp.Eq(func, y), x)
    
    # Generate data points for plotting
    x_vals = np.linspace(-10, 10, 400)
    y_vals = np.array([func.subs(x, val) for val in x_vals], dtype=float)
    
    # Ensure the inverse function is valid and single-valued
    if len(inverse_func) == 1:
        inverse_func = inverse_func[0]
        y_vals_inverse = np.linspace(-10, 10, 400)
        x_vals_inverse = np.array([inverse_func.subs(y, val) for val in y_vals_inverse], dtype=float)
        
        # Plot the polynomial and its inverse
        plt.figure(figsize=(10, 6))
        plt.plot(x_vals, y_vals, label=f'{func}')
        plt.plot(y_vals_inverse, x_vals_inverse, label=f'Inverse of {func}', linestyle='dashed')
        plt.plot(x_vals, x_vals, label='y = x', linestyle='dotted', color='gray')
        plt.xlabel('x')
        plt.ylabel('y')
        plt.legend()
        plt.title('Polynomial and its Inverse')
        plt.grid(True)
        plt.show()
    else:
        print("The inverse function is not single-valued, indicating the polynomial is not invertible.")

# Example usage
x = sp.symbols('x')
degree = 3
coefficient_range = (-10, 10)

# Generate a random polynomial
poly = generate_random_polynomial(degree, x, coefficient_range)

# Check if the polynomial is invertible
invertible = is_invertible(poly)

print(f"Random Polynomial: {poly.as_expr()}")
print(f"Invertible: {invertible}")

if invertible:
    plot_polynomial_and_inverse(poly)
else:
    print("The polynomial is not invertible.")


Random Polynomial: -5*x**3 - 2*x**2 - x + 1
Invertible: True
The inverse function is not single-valued, indicating the polynomial is not invertible.


In [4]:
# from pest_blending import DeepImageBlending

N_generate = 5

#change the paths appropriately
deep_image_blending = DeepImageBlending(outputImagesDir='D:/git/PestVisionChallenge/synthetic_data/test_dib/images',
                                        outputLabelsDir='D:/git/PestVisionChallenge/synthetic_data/test_dib/labels',
                                        outputMetadataDir='D:/git/PestVisionChallenge/synthetic_data/test_dib/metadata',
                                        device='cuda:0',
                                        max_pests_per_image=5,
                                        target_image_size=512,
                                        source_image_range_big=source_range_big,
                                        source_image_range_small=source_range_small,
                                        num_steps1=1000)

for i in tqdm(range(N_generate)):
   
   split_prob = np.random.rand()

   if split_prob < 0.65:
      split = 'train'
   elif split_prob < 0.80:
      split = 'val'
   else: 
      split = 'test'

   foreground_dataset = foreground_datasets[split]
   background_dataset = background_datasets[split]

   N_foreground, N_background = len(foreground_dataset), len(background_dataset)

   deep_image_blending.generate_blended_image(foreground_dataset = foreground_dataset,
                                              background_dataset = background_dataset,
                                              N_foreground = N_foreground,
                                              N_background = N_background,
                                              split = split,
                                              file_save_index=i)

   

  0%|          | 0/5 [00:34<?, ?it/s]


KeyboardInterrupt: 

In [None]:
from pest_blending import LibcomImageHarmonization

N_generate = 10

libcom_image_harmonization = LibcomImageHarmonization(outputImagesDir='/home/siddhibrahmbhatt/code_siddhi/PestVisionChallengeChallenge/synthetic_data_generation/test_ih/images',
                                                      outputLabelsDir='/home/siddhibrahmbhatt/code_siddhi/PestVisionChallenge/synthetic_data_generation/test_ih/labels',
                                                      outputMetadataDir='/home/siddhibrahmbhatt/code_siddhi/PestVisionChallenge/synthetic_data_generation/test_ih/metadata',
                                                      outputTempDir='/home/siddhibrahmbhatt/code_siddhi/PestVisionChallenge/synthetic_data_generation/test_ih/temp_libcom',
                                                      device=0,
                                                      max_pests_per_image=5,
                                                      target_image_size=512,
                                                      source_image_range_big=source_range_big,
                                                      source_image_range_small=source_range_small,
                                                      model_type = "PCTNet")

for i in tqdm(range(N_generate)):
   
   split_prob = np.random.rand()

   if split_prob < 0.65:
      split = 'train'
   elif split_prob < 0.80:
      split = 'val'
   else: 
      split = 'test'

   foreground_dataset = foreground_datasets[split]
   background_dataset = background_datasets[split]

   N_foreground, N_background = len(foreground_dataset), len(background_dataset)

   libcom_image_harmonization.generate_blended_image(foreground_dataset = foreground_dataset,
                                                        background_dataset = background_dataset,
                                                        N_foreground = N_foreground,
                                                        N_background = N_background,
                                                        split = split,
                                                        file_save_index=i)