In [13]:
import random
from collections import deque

class GenerateImages(object):
    
    def __init__(self, num_images, grid_height = 100000, grid_width = 100000, 
                 cancer_probability = 0.001, cancer_area_threshold = 0.1, 
                 parasite_occupied_area_percentage = 0.25):
        
        self.num_images = num_images
        self.grid_height = grid_height
        self.grid_width  = grid_width
        self.parasite_occupied_area_percentage = parasite_occupied_area_percentage
        self.cancer_area_threshold   = cancer_area_threshold
        self.cancer_probability = cancer_probability
        
        self.grid = [[0 for _ in range(self.grid_height)] for _ in range(self.grid_width)]
        
        self.microscope_images = []
        self.sensor_images = []
        self.cancerous_parasites_list = []
        
    def bfs_helper(self, grid, start_x, start_y, parasite_num=1, microscope=True):
        """
        do bfs in grid starting from (start_x, start_y) and use parasite_num to label visited cells. 
        microscope is True if generating a microscope image, False if generating a sensor image.
        returns a list of all points visited, stopped using the scheme explained below for randomness.
        """
        
        queue = deque([(start_x, start_y)])  # Initialize the queue with the starting point
        area = 0
        
        min_parasite_area = self.parasite_occupied_area_percentage*self.grid_height*self.grid_width
        extra_sensor_area = self.cancer_area_threshold*self.grid_height*self.grid_width
        
        cancer = 0
        prob_stop = random.random()
        if(prob_stop >= (1 - self.cancer_probability)):
            cancer = 1

        image = []

        while queue:
            x, y = queue.popleft()
            if(grid[x][y] != parasite_num):
                grid[x][y] = parasite_num  # Mark the cell as visited (1)
                image.append([x,y]) # add to image
                area += 1

                if(microscope):
                    # randomly stop generating parasite image if area exceeds the threshold
                    if(area >= min_parasite_area):
                        prob_stop_micro = random.random()
                        if(prob_stop_micro >= 0.5):
                            break
                else: # if sensor image
                    """
                    if parasite is cancerous (cancer=1), stop randomly after exceeding
                    the 10% threshold for having cancer.
                    If not cancer (cancer=0), stop randomly when a < area < b, where
                    a is area of only parasite body, b is area of parasite body + 10% area of parasite body.
                    This random stopping is done to generate randomly in the images, otherwise all images will
                    have same area. The equation below is just implementation of this
                    """
                    if(area >= min_parasite_area + (cancer+random.random())*extra_sensor_area):
                            break

                # Generate neighbors (up, down, left, right)
                neighbours = [(x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)]
                if(not microscope):
                    random.shuffle(neighbours)

                # Add unvisited neighbors to the queue
                for nx, ny in neighbours:
                    if 0 <= nx < self.grid_height and 0 <= ny < self.grid_width and grid[nx][ny] != parasite_num:
                         queue.append((nx, ny))
                        
        return image
    
    def generate_images(self):
        """
        generates images and stores them
        """
        
        for i in range(1, self.num_images+1):
            
            start_x = random.randint(0, self.grid_height - 1) # x is along the rows
            start_y = random.randint(0, self.grid_width  - 1) # y is along the columns
            grid = self.grid
            
            microscope_image = self.bfs_helper(grid, start_x, start_y, i, microscope=True)
            sensor_image = self.bfs_helper(grid, start_x, start_y, -i, microscope=False) # for sensor images use -i to signify visited
            
            self.microscope_images.append(microscope_image)
            self.sensor_images.append(sensor_image)
            
    def detect_cancer(self):
        """
        returns the indices of cancerous parasites. calculated based on area covered by dye and parasite
        """
        
        for i in range(self.num_images):
            area_parasite = len(self.microscope_images[i])
            area_dye      = len(self.sensor_images[i])
            
            if(area_dye >= (1+self.cancer_area_threshold)*area_parasite):
                self.cancerous_parasites_list.append(i)

In [14]:
num_images = 1000
grid_height = 100
grid_width = 100
cancer_probability = 0.001 # less than 0.1% have cancer
cancer_area_threshold = 0.1 # if area of dye exceeds 10% then cancer
parasite_occupied_area_percentage = 0.25 # minimum fraction of area occupied by parasite in image

obj = GenerateImages(num_images, grid_height, grid_width, cancer_probability)
obj.generate_images()
obj.detect_cancer()

In [15]:
obj.cancerous_parasites_list

[]