In [1]:
# import libraries
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import shapely.geometry as geom

In [2]:
# dataframe

In [3]:
# Create a DataFrame with box names and values
box_names = [chr(65 + i) for i in range(16)]
index = list(range(0, 16))
values = list(range(1, 17))

In [4]:
df = pd.DataFrame({'FIPS': box_names,'Index': index, 'Value': values})

# Sorting the DataFrame based on the 'rate' column :::: Values are needed to be sorted in ascending order
df.sort_values(by='Value', inplace=True)

# Adding a new column 'new_ID' with ID values starting from zero
df['sortedID'] = range(len(df))

# Function to calculate square coordinates for a given name
def calculate_square_coordinates(row):
    value = row['Index']
    # Assuming each smaller square has a side length of 1 unit
    x = value % 4
    y = value // 4
    # Return square coordinates as a Shapely polygon
    return geom.Polygon([(x, y), (x+1, y), (x+1, y+1), (x, y+1)])

# Calculate square coordinates for each row and create a geometry column
df['geometry'] = df.apply(calculate_square_coordinates, axis=1)

In [5]:
df

Unnamed: 0,FIPS,Index,Value,sortedID,geometry
0,A,0,1,0,"POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"
1,B,1,2,1,"POLYGON ((1 0, 2 0, 2 1, 1 1, 1 0))"
2,C,2,3,2,"POLYGON ((2 0, 3 0, 3 1, 2 1, 2 0))"
3,D,3,4,3,"POLYGON ((3 0, 4 0, 4 1, 3 1, 3 0))"
4,E,4,5,4,"POLYGON ((0 1, 1 1, 1 2, 0 2, 0 1))"
5,F,5,6,5,"POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))"
6,G,6,7,6,"POLYGON ((2 1, 3 1, 3 2, 2 2, 2 1))"
7,H,7,8,7,"POLYGON ((3 1, 4 1, 4 2, 3 2, 3 1))"
8,I,8,9,8,"POLYGON ((0 2, 1 2, 1 3, 0 3, 0 2))"
9,J,9,10,9,"POLYGON ((1 2, 2 2, 2 3, 1 3, 1 2))"


In [6]:
# shuffling the values
np.random.seed(0)
np.random.shuffle(values)
values

[2, 7, 9, 10, 14, 5, 3, 15, 11, 8, 16, 12, 4, 1, 6, 13]

In [7]:
# Adding shuffled values to the DataFrame
df['Value2'] = values

In [8]:
# shuffling the values
np.random.seed(0)
np.random.shuffle(values)
values

# Adding shuffled values to the DataFrame
df['Value3'] = values

In [9]:
values

[7, 3, 11, 8, 1, 14, 9, 6, 16, 15, 13, 12, 10, 2, 5, 4]

In [10]:
# Convert the DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry')

# Set the CRS to a simple Cartesian coordinate system
gdf.crs = "EPSG:3395"  # This is a commonly used projected CRS

gdf = gdf[['FIPS','Value','Value2','Value3', 'geometry']]  

In [11]:
from adjacency_simplex import AdjacencySimplex  # Import the class

In [12]:
# Initialize the AdjacencySimplex class
adj_simplex = AdjacencySimplex(gdf, 'Value3', threshold = None, filter_method = 'down')

# Filter the GeoDataFrame
filtered_df,gdf_id = adj_simplex.filter_sort_gdf()

# Calculate the adjacent countries
adj_simplex.calculate_adjacent_countries()

# Form the simplicial complex
simplices = adj_simplex.form_simplicial_complex()

In [13]:
gdf.shape

(16, 5)

In [14]:
filtered_df.head()

Unnamed: 0,FIPS,Value,Value2,Value3,geometry,sortedID
8,I,9,11,16,"POLYGON ((0 2, 1 2, 1 3, 0 3, 0 2))",0
9,J,10,8,15,"POLYGON ((1 2, 2 2, 2 3, 1 3, 1 2))",1
5,F,6,5,14,"POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))",2
10,K,11,16,13,"POLYGON ((2 2, 3 2, 3 3, 2 3, 2 2))",3
11,L,12,12,12,"POLYGON ((3 2, 4 2, 4 3, 3 3, 3 2))",4


In [15]:
simplices

[[0],
 [1],
 [0, 1],
 [2],
 [0, 2],
 [1, 2],
 [0, 1, 2],
 [3],
 [1, 3],
 [2, 3],
 [1, 2, 3],
 [4],
 [3, 4],
 [5],
 [2, 5],
 [6],
 [0, 6],
 [1, 6],
 [0, 1, 6],
 [7],
 [1, 7],
 [2, 7],
 [1, 2, 7],
 [3, 7],
 [1, 3, 7],
 [2, 3, 7],
 [4, 7],
 [3, 4, 7],
 [5, 7],
 [2, 5, 7],
 [8],
 [5, 8],
 [7, 8],
 [5, 7, 8],
 [9],
 [2, 9],
 [10],
 [3, 10],
 [4, 10],
 [3, 4, 10],
 [5, 10],
 [7, 10],
 [3, 7, 10],
 [4, 7, 10],
 [5, 7, 10],
 [8, 10],
 [5, 8, 10],
 [7, 8, 10],
 [11],
 [1, 11],
 [3, 11],
 [1, 3, 11],
 [4, 11],
 [3, 4, 11],
 [12],
 [3, 12],
 [4, 12],
 [3, 4, 12],
 [11, 12],
 [3, 11, 12],
 [4, 11, 12],
 [13],
 [2, 13],
 [5, 13],
 [2, 5, 13],
 [7, 13],
 [2, 7, 13],
 [5, 7, 13],
 [9, 13],
 [2, 9, 13],
 [14],
 [0, 14],
 [1, 14],
 [0, 1, 14],
 [3, 14],
 [1, 3, 14],
 [6, 14],
 [0, 6, 14],
 [1, 6, 14],
 [11, 14],
 [1, 11, 14],
 [3, 11, 14],
 [15],
 [0, 15],
 [1, 15],
 [0, 1, 15],
 [2, 15],
 [0, 2, 15],
 [1, 2, 15],
 [9, 15],
 [2, 9, 15],
 [13, 15],
 [9, 13, 15],
 [2, 13, 15]]

In [35]:
filtered_df

Unnamed: 0,FIPS,Value,Value2,Value3,geometry,sortedID
8,I,9,11,16,"POLYGON ((0 2, 1 2, 1 3, 0 3, 0 2))",0
9,J,10,8,15,"POLYGON ((1 2, 2 2, 2 3, 1 3, 1 2))",1
5,F,6,5,14,"POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))",2
10,K,11,16,13,"POLYGON ((2 2, 3 2, 3 3, 2 3, 2 2))",3
11,L,12,12,12,"POLYGON ((3 2, 4 2, 4 3, 3 3, 3 2))",4
2,C,3,9,11,"POLYGON ((2 0, 3 0, 3 1, 2 1, 2 0))",5
12,M,13,4,10,"POLYGON ((0 3, 1 3, 1 4, 0 4, 0 3))",6
6,G,7,3,9,"POLYGON ((2 1, 3 1, 3 2, 2 2, 2 1))",7
3,D,4,10,8,"POLYGON ((3 0, 4 0, 4 1, 3 1, 3 0))",8
0,A,1,2,7,"POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",9


Plotting

In [16]:
from plot_utills import plot_simplicial_complex_gif  # Import the function

In [17]:
import io
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

In [18]:
list_gif = plot_simplicial_complex_gif(dataframe=gdf_id, simplices=simplices, variable='Value3')
list_gif[0].save('test_Value3.gif', save_all=True,append_images=list_gif[1:],optimize=False,duration=200,loop=0) 

TDA Summary calaculation code is given below. 

In [19]:
import gudhi
import numpy as np

In [20]:
variable_name = 'Value3'

In [None]:
st = gudhi.SimplexTree()
st.set_dimension(2)

In [None]:
for simplex in simplices:
    if len(simplex) == 1:
        st.insert([simplex[0]], filtration=0.0)

In [38]:
for simplex in simplices:
    if len(simplex) == 2:
        print(simplex)
        last_simplex = simplex[-1]
        first_simplex = simplex[0]
        print(last_simplex)
        filtration_value = filtered_df.loc[filtered_df['sortedID'] == last_simplex, variable_name].values[0]

        FIRST_VALUE = filtered_df.loc[filtered_df['sortedID'] == first_simplex, variable_name].values[0]
        print(f'first value:  - {FIRST_VALUE}')

        print(filtration_value)

        st.insert(simplex, filtration=filtration_value)

        # break


[0, 1]
1
first value:  - 16
15
[0, 2]
2
first value:  - 16
14
[1, 2]
2
first value:  - 15
14
[1, 3]
3
first value:  - 15
13
[2, 3]
3
first value:  - 14
13
[3, 4]
4
first value:  - 13
12
[2, 5]
5
first value:  - 14
11
[0, 6]
6
first value:  - 16
10
[1, 6]
6
first value:  - 15
10
[1, 7]
7
first value:  - 15
9
[2, 7]
7
first value:  - 14
9
[3, 7]
7
first value:  - 13
9
[4, 7]
7
first value:  - 12
9
[5, 7]
7
first value:  - 11
9
[5, 8]
8
first value:  - 11
8
[7, 8]
8
first value:  - 9
8
[2, 9]
9
first value:  - 14
7
[3, 10]
10
first value:  - 13
6
[4, 10]
10
first value:  - 12
6
[5, 10]
10
first value:  - 11
6
[7, 10]
10
first value:  - 9
6
[8, 10]
10
first value:  - 8
6
[1, 11]
11
first value:  - 15
5
[3, 11]
11
first value:  - 13
5
[4, 11]
11
first value:  - 12
5
[3, 12]
12
first value:  - 13
4
[4, 12]
12
first value:  - 12
4
[11, 12]
12
first value:  - 5
4
[2, 13]
13
first value:  - 14
3
[5, 13]
13
first value:  - 11
3
[7, 13]
13
first value:  - 9
3
[9, 13]
13
first value:  - 7
3
[0, 14

In [None]:
for simplex in simplices:
    if len(simplex) == 3:
        last_simplex = simplex[-1]
        filtration_value = filtered_df.loc[filtered_df['sortedID'] == last_simplex, variable_name].values[0]
        st.insert(simplex, filtration=filtration_value)

In [22]:
st.compute_persistence()
persistence = st.persistence()

In [23]:
# intervals_dim1 = st.persistence_intervals_in_dimension(1)
intervals_dim0 = st.persistence_intervals_in_dimension(0)

In [24]:
intervals_dim0

array([[ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  2.],
       [ 0.,  2.],
       [ 0.,  2.],
       [ 0.,  2.],
       [ 0.,  3.],
       [ 0.,  3.],
       [ 0.,  4.],
       [ 0.,  4.],
       [ 0.,  6.],
       [ 0.,  6.],
       [ 0., inf]])

In [25]:
# get the max value of the filtered_df to replace inf
max_value = filtered_df[variable_name].max()
print(f'max value: {max_value}')



max value: 16


In [26]:
# replace inf with a large number   #this needs to be fixed : previously used 16
intervals_dim0[:, 1][np.isinf(intervals_dim0[:, 1])] = max_value

In [27]:
intervals_dim0

array([[ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  2.],
       [ 0.,  2.],
       [ 0.,  2.],
       [ 0.,  2.],
       [ 0.,  3.],
       [ 0.,  3.],
       [ 0.,  4.],
       [ 0.,  4.],
       [ 0.,  6.],
       [ 0.,  6.],
       [ 0., 16.]])

In [28]:
# calculate topological summaries for dimension 1
H0_data_points = len(intervals_dim0)

print(f'Number of H0 data points: {H0_data_points}')

Number of H0 data points: 16


In [29]:
TL = 0
for interval in intervals_dim0:
        TL += interval[1] - interval[0]

print(f'Total length of H0 intervals: {TL}')

Total length of H0 intervals: 55.0


In [30]:
TML = 0
for interval in intervals_dim0:
    TML += (interval[1] + interval[0])/2

print(f'Total mean length of H0 intervals: {TML}')

Total mean length of H0 intervals: 27.5


In [31]:
AL = TL/len(intervals_dim0)
AML = TML/len(intervals_dim0)

In [32]:
TL/2

27.5

In [33]:
# TL: 151.0 for Value
# TL: 159.0 for Value2
# TL: 165.0 for Value3
