In [1]:
from itertools import combinations

import pandas as pd
from shapely import intersection
from geopandas import GeoDataFrame, sjoin
from colocationpatterns.sample_data import generate_sample_data

In [2]:
class ColocationMiner:

    def __init__(
        self,
        data: GeoDataFrame,
        feature_type_column: str,
        feature_type_unique_id_column: str,
        neighbourhood: float
    ):

        self.data = data # all events instances (id, event tpy, location, other attributes)
        self.feature_type_column = feature_type_column # column to recognize event type
        self.feature_type_unique_id_column = feature_type_unique_id_column # column with id unique within event type
        self.ET = set(data[feature_type_column].unique()) # set of event types
        self.K = data[feature_type_column].nunique() # number of event types
        self.R = neighbourhood # radius to set euclidean, neighbourhood relation
        self.tables = {k: {} for k in range(2, self.K+1)} # structure to store tables based on size-k colocation
        self.statistics = None
        self.colocations = None

    def calculate_participation_ratio(self, candidate_table, feature_type):

        all_feature_types = len(self.get_elementary_table(feature_type))
        features_in_colocation = candidate_table[feature_type].nunique()

        return features_in_colocation/all_feature_types

    def calculate_participation_index(self, participation_ratios):

        return min(participation_ratios)

    def calculate_conditional_probalility(self):

        pass

    def get_elementary_table(self, feature_type):

        return GeoDataFrame([{
            feature_type: getattr(event, self.feature_type_unique_id_column),
            'geometry': event.geometry
        } for event in self.data[self.data[self.feature_type_column]==feature_type].itertuples()])

    def merge_by_neighbourhood(self, tables_id:tuple):
    
        result = self.get_elementary_table(tables_id[0])    
        result['geometry'] = result['geometry'].buffer(self.R)
        
        for table_id in tables_id[1::]:

            table = self.get_elementary_table(table_id)
            merged = sjoin(result, table)
            if merged.empty: return None
            merged.drop(columns='index_right', inplace=True)
            merged = pd.merge(merged, table, left_on=table_id, right_on=table_id)
            merged['geometry_y'] = merged['geometry_y'].buffer(self.R)
            merged['geometry'] = merged.apply(lambda row: intersection(row['geometry_x'], row['geometry_y']), axis=1)
            result = merged.drop(columns=['geometry_x', 'geometry_y'])
    
        return result
    
    
    def mine(self, min_participation_index: float, store_higher_lvl_tables:bool=False, verbose:bool=False):

        statistics = {}

        # Generate co-location candidates and compute statistics
        for k in range(2, self.K+1):
            
            for colocation in combinations(self.ET, k):

                if verbose: print(f'Creating table k_level = {k} for tables {colocation}')
                table = self.merge_by_neighbourhood(colocation)
                if table is None:
                    if verbose: print(f'No points for co-location candidate {colocation} to match neighbourhood definition R={self.R}')
                    continue
                if store_higher_lvl_tables:
                    self.tables[k][colocation] = table
                
                statistics[colocation] = {'participation_ratios': {}}
                for feature_type in colocation:
                    pr = self.calculate_participation_ratio(table, feature_type)
                    statistics[colocation]['participation_ratios'][feature_type] = pr
                    if verbose: print(f'\tParticipation ratio for {feature_type} in colocation {colocation} is {pr}')
                
                pi = self.calculate_participation_index(statistics[colocation]['participation_ratios'].values())
                statistics[colocation]['participation_index'] = pi
                if verbose: print(f'\tParticipation index for colocation {colocation} is {pi}')

                statistics[colocation]['colocation'] = pi >= min_participation_index
    
        self.statistics = statistics
        self.colocations = list(filter(lambda key: self.statistics[key]['colocation'], self.statistics.keys()))

        return self.colocations

In [3]:
data = generate_sample_data()

In [24]:
cm = ColocationMiner(data, 'spatial_feature_type', 'instance_id', 4)

In [25]:
cm.mine(0.5, store_higher_lvl_tables=True)

[('C', 'A'), ('C', 'B'), ('A', 'B'), ('C', 'A', 'B')]

In [26]:
cm.tables

{2: {('C',
   'A'):    C  A                                           geometry
  0  1  3  POLYGON ((4.82224 2.16114, 4.95448 2.53073, 5....
  1  2  1  POLYGON ((4.08074 2.90793, 4.02314 2.51964, 3....,
  ('C',
   'B'):    C  B                                           geometry
  0  1  2  POLYGON ((12.63074 0.60793, 12.57314 0.21964, ...
  1  1  4  POLYGON ((6.76441 -2.52769, 6.42772 -2.32588, ...
  2  2  1  POLYGON ((4.08074 2.90793, 4.02314 2.51964, 3....
  3  3  3  POLYGON ((17.52588 3.27772, 17.29204 2.96243, ...
  4  3  5  POLYGON ((18.18074 5.10793, 18.12314 4.71964, ...,
  ('A',
   'B'):    A  B                                           geometry
  0  1  1  POLYGON ((5.73074 1.35793, 5.67314 0.96964, 5....
  1  2  4  POLYGON ((9.43074 3.35793, 9.37314 2.96964, 9....
  2  3  2  POLYGON ((12.48074 2.85793, 12.42314 2.46964, ...
  3  3  4  POLYGON ((9.66114 -0.57776, 9.28036 -0.67314, ...},
 3: {('C',
   'A',
   'B'):    C  A  B                                           geometry
  0 

In [17]:
cm.statistics

{('C', 'A'): {'participation_ratios': {'C': 0.6666666666666666, 'A': 0.5},
  'participation_index': 0.5,
  'colocation': True},
 ('C', 'B'): {'participation_ratios': {'C': 1.0, 'B': 1.0},
  'participation_index': 1.0,
  'colocation': True},
 ('A', 'B'): {'participation_ratios': {'A': 0.75, 'B': 0.6},
  'participation_index': 0.6,
  'colocation': True},
 ('C',
  'A',
  'B'): {'participation_ratios': {'C': 0.6666666666666666,
   'A': 0.5,
   'B': 0.6}, 'participation_index': 0.5, 'colocation': True}}