In [1]:
import json

import pandas as pd

from shapely.geometry import Point
from shapely.geometry import Polygon

from CCAgT_utils.converters import CCAgT

In [2]:
data = [{'image_name': 'A_xxx', 'geometry': Point(1, 1), 'category_id': 3},
        {'image_name': 'A_yyy', 'geometry': Point(10, 10), 'category_id': 3},
        {'image_name': 'A_yyy', 'geometry': Polygon([(40, 40), (50, 50), (50, 40)]), 'category_id': 1},
        {'image_name': 'B_yyy', 'geometry': Point(10, 10), 'category_id': 3},
        {'image_name': 'B_yyy', 'geometry': Point(2000, 2000), 'category_id': 3}]

df = pd.DataFrame(data)
df

Unnamed: 0,image_name,geometry,category_id
0,A_xxx,POINT (1 1),3
1,A_yyy,POINT (10 10),3
2,A_yyy,"POLYGON ((40 40, 50 50, 50 40, 40 40))",1
3,B_yyy,POINT (10 10),3
4,B_yyy,POINT (2000 2000),3


In [3]:
ccagt_ann = CCAgT.CCAgT_Annotations(df)

In [4]:
df['slide_id'] = ccagt_ann.get_slide_id()
df

Unnamed: 0,image_name,geometry,category_id,slide_id
0,A_xxx,POINT (1 1),3,A
1,A_yyy,POINT (10 10),3,A
2,A_yyy,"POLYGON ((40 40, 50 50, 50 40, 40 40))",1,A
3,B_yyy,POINT (10 10),3,B
4,B_yyy,POINT (2000 2000),3,B


In [5]:
df['geo_type'] = ccagt_ann.geometries_type()
df

Unnamed: 0,image_name,geometry,category_id,slide_id,geo_type
0,A_xxx,POINT (1 1),3,A,Point
1,A_yyy,POINT (10 10),3,A,Point
2,A_yyy,"POLYGON ((40 40, 50 50, 50 40, 40 40))",1,A,Polygon
3,B_yyy,POINT (10 10),3,B,Point
4,B_yyy,POINT (2000 2000),3,B,Point


In [6]:
sat_series = df.loc[(df['category_id']==3) & (df['geo_type']=='Point'), 'geometry']

df.loc[(df['category_id']==3) & (df['geo_type']=='Point'), 'geometry'] = ccagt_ann.satellite_point_to_polygon(sat_series)

df['geo_type'] = ccagt_ann.geometries_type()

df

Unnamed: 0,image_name,geometry,category_id,slide_id,geo_type
0,A_xxx,"POLYGON ((6.046265044040321 1, 5.6621409898160...",3,A,Polygon
1,A_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,A,Polygon
2,A_yyy,"POLYGON ((40 40, 50 50, 50 40, 40 40))",1,A,Polygon
3,B_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,B,Polygon
4,B_yyy,"POLYGON ((2005.04626504404 2000, 2004.66214098...",3,B,Polygon


In [7]:
df['geometry'] = ccagt_ann.fit_geometries_to_image_boundary()
df

Unnamed: 0,image_name,geometry,category_id,slide_id,geo_type
0,A_xxx,"POLYGON ((5.847352676660663 0, 0 0, 0 5.847352...",3,A,Polygon
1,A_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,A,Polygon
2,A_yyy,"POLYGON ((40 40, 50 50, 50 40, 40 40))",1,A,Polygon
3,B_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,B,Polygon
4,B_yyy,,3,B,Polygon


In [8]:
df.dropna(axis=0, subset=['geometry'], inplace=True)


In [9]:
df['area'] = ccagt_ann.geometries_area()
df

Unnamed: 0,image_name,geometry,category_id,slide_id,geo_type,area
0,A_xxx,"POLYGON ((5.847352676660663 0, 0 0, 0 5.847352...",3,A,Polygon,30.383525
1,A_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,A,Polygon,77.959629
2,A_yyy,"POLYGON ((40 40, 50 50, 50 40, 40 40))",1,A,Polygon,50.0
3,B_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,B,Polygon,77.959629


In [10]:
df['image_id'] = ccagt_ann.generate_ids(df['image_name'])
df

Unnamed: 0,image_name,geometry,category_id,slide_id,geo_type,area,image_id
0,A_xxx,"POLYGON ((5.847352676660663 0, 0 0, 0 5.847352...",3,A,Polygon,30.383525,1
1,A_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,A,Polygon,77.959629,2
2,A_yyy,"POLYGON ((40 40, 50 50, 50 40, 40 40))",1,A,Polygon,50.0,2
3,B_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,B,Polygon,77.959629,3


In [11]:
ccagt_ann.df

Unnamed: 0,image_name,geometry,category_id,slide_id,geo_type,area,image_id
0,A_xxx,"POLYGON ((5.847352676660663 0, 0 0, 0 5.847352...",3,A,Polygon,30.383525,1
1,A_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,A,Polygon,77.959629,2
2,A_yyy,"POLYGON ((40 40, 50 50, 50 40, 40 40))",1,A,Polygon,50.0,2
3,B_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,B,Polygon,77.959629,3


In [12]:
helper_path = '../../data/samples/categories_helper.json'
with open(helper_path, 'r') as hf:
    categories_helpper = json.load(hf)

categories_helpper[0], len(categories_helpper)


({'color': [21, 62, 125],
  'name': 'Nucleus',
  'id': 1,
  'labelbox_schemaId': '<Unique ID for category Nucleus>',
  'minimal_area': 500},
 7)

In [13]:
ccagt_helper = CCAgT.Categories_Helper(categories_helpper)

In [14]:
ccagt_helper.name_by_category_id

{1: 'Nucleus',
 2: 'Cluster',
 3: 'Satellite',
 4: 'Nucleus_out_of_focus',
 5: 'Overlapped_Nuclei',
 6: 'non-viable_nucleus',
 7: 'Leukocyte_Nucleus'}

In [15]:
df = ccagt_ann.delete_by_area(ccagt_helper, ignore_categories={3})
df

Unnamed: 0,image_name,geometry,category_id,slide_id,geo_type,area,image_id
0,A_xxx,"POLYGON ((5.847352676660663 0, 0 0, 0 5.847352...",3,A,Polygon,30.383525,1
1,A_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,A,Polygon,77.959629,2
3,B_yyy,"POLYGON ((15.04626504404032 10, 14.66214098981...",3,B,Polygon,77.959629,3
