In [1]:
# g++ -O3 -shared -o c_functions.so -fPIC c_functions.cpp
try:
    import pandas as pd
    import numpy as np
    import math
    import csv
    import ctypes
    import os
    import geopandas as gpd
    from rtree import index
    from itertools import combinations
    import time
except:
    !pip install geopandas



In [2]:
library = ctypes.CDLL('./functions.so')

In [3]:
import time
start = time.time()

In [4]:
#s = time.time()
library.subregion_main()
#e = time.time()
#print("ENTIRE SUBREGION_MAIN PROCESSING:", e - s)

Degree 2 Prevalent Patterns for Sub-Region 0:
(0, 1)
(0, 2)
(0, 3)
(0, 6)
(1, 2)
(1, 3)
(1, 6)
(2, 3)
(2, 6)
(3, 6)
Degree 3 Prevalent Patterns for Sub-Region 0: 
(0, 1, 2)
(0, 2, 3)
(0, 2, 6)
(1, 2, 3)
Degree 2 Prevalent Patterns for Sub-Region 1:
(0, 1)
(0, 2)
(0, 3)
(0, 6)
(1, 2)
(1, 3)
(2, 3)
(2, 6)
(3, 6)
Degree 3 Prevalent Patterns for Sub-Region 1: 
(0, 1, 2)
(0, 2, 3)
(0, 2, 6)
(0, 3, 6)
(1, 2, 3)
(2, 3, 6)
Degree 4 Prevalent Patterns for Sub-Region 1: 
(0, 2, 3, 6)


0

In [None]:
#s = time.time()
# Parameters: These are the only things that need to be changed
# parameters: distance_threshold, prevalance_threshold
distance_threshold = 16
prevalence_threshold = 0.55    # set the prevalence threshold
# shapefile path to determine border points
shapefile_path = '/Data/Region6/shapefile'
directory_path = '/Data/Region6'
subregions = []
dataframes = []
offsets = [0]
number_subregions = 0
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory_path, filename)
        df = pd.read_csv(file_path)
        df.index = range(offsets[number_subregions], offsets[number_subregions] + len(df))  # apply the offset
        offsets.append(df.shape[number_subregions])
        dataframes.append(df)
        number_subregions += 1
        
offsets.pop()
#e = time.time()
#print("PROCESSING (SEC):", e - s)

5

In [7]:
# This class holds the information pertaining to the border region
class Border:
    def __init__(self):
        self.combined_df = pd.DataFrame()
        self.border_df = pd.DataFrame()

In [8]:
#s = time.time()
border = Border()
border.combined_df = pd.concat(dataframes, ignore_index=True)
border.combined_df['ID'] = border.combined_df.index

# sort the df by featureType
border.combined_df = border.combined_df.sort_values(by='featureType', ignore_index=True)

In [9]:
shapefile = gpd.read_file(shapefile_path)
points = np.array(border.combined_df[['latitude', 'longitude']])
points_gdf = gpd.GeoDataFrame(geometry=gpd.points_from_xy(points[:, 1], points[:, 0]))
points_gdf = points_gdf.set_crs("EPSG:4326")
shapefile = shapefile.to_crs("EPSG:4326")

In [10]:
featureType = []
x = []
y = []
ID = []
curr_index = 0
for point in points_gdf.geometry:
    # Convert radius from kilometers to degrees (assuming a spherical Earth)
    radius_deg = distance_threshold / 111.32  # Approximately 111.32 km per degree of latitude
    # Create a circle geometry
    circle = point.buffer(radius_deg)
    
    # Find the borders that the point intersects
    intersected_borders = [border for border in shapefile['geometry'] if circle.intersects(border)]
    
    if len(intersected_borders) >= 2:
        featureType.append(border.combined_df['featureType'].iloc[curr_index])
        x.append(border.combined_df['xCoordinate'].iloc[curr_index])
        y.append(border.combined_df['yCoordinate'].iloc[curr_index])
        ID.append(border.combined_df['ID'].iloc[curr_index])
    curr_index += 1

In [11]:
border.border_df = pd.DataFrame({
    'featureType': featureType,
    'xCoordinate': x,
    'yCoordinate': y,
    'ID': ID})

In [12]:
ids = border.border_df['ID'].to_list()

In [None]:
border.border_df.to_csv("/IntermediateData/border.csv", index=False)
#e = time.time()
#print("BORDER REGION CALCULATION (SEC): ", e - s)

In [14]:
#s = time.time()
number_borders = 1
library.border_main(ctypes.c_int(number_borders))
#e = time.time()
#print("ENTIRE BORDER MAIN:", e - s)

Degree 2 Prevalent Patterns for Sub-Region 0:
(0, 1)
(0, 2)
(0, 3)
(0, 6)


0

(1, 2)
(1, 3)
(2, 3)
(2, 6)


In [15]:
#s = time.time()
arr_len = len(ids)
arr_type = ctypes.c_int * arr_len
arr_c = arr_type(*ids)
border_number = 0
library.update_border_info.argtypes = (ctypes.POINTER(ctypes.c_int), ctypes.c_int, ctypes.c_int)
library.update_border_info(arr_c, arr_len, border_number)

0

In [16]:
library.combine_hashmaps.argtypes = (ctypes.c_int, ctypes.c_int)
library.combine_hashmaps(number_subregions, number_borders)

0

In [17]:
library.combine_instance_tables.argtypes = (ctypes.c_int, ctypes.c_int)
library.combine_instance_tables(number_subregions, number_borders)
#e = time.time()
#print("UPDATE BORDER INFO / COMBINE INSTANCE TABLE / HASHMAP (SEC):", e - s)

0

In [18]:
#s = time.time()
library.region_main()
#e = time.time()
#print("ENTIRE REGION_MAIN (SEC):", e - s)

Degree 2 Prevalent Patterns for Entire Region:
(0, 1)
(0, 3)
(0, 6)
(1, 3)
(2, 3)
(3, 6)
(1, 2)
(0, 2)
(2, 6)
Degree 3 Prevalent Patterns for Entire Region:
(0, 1, 2)
(0, 2, 3)
(0, 2, 6)
(0, 3, 6)
(1, 2, 3)
(2, 3, 6)
Degree 4 Prevalent Patterns for Entire Region:
(0, 2, 3, 6)


0

In [None]:
end = time.time()