### Multiple filteraton level plotting using adjacency method

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import numpy as np
import itertools
from itertools import combinations
from scipy import spatial
import pickle as pickle
import gudhi
from pylab import *
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
import io
from tqdm import tqdm
from PIL import Image, ImageDraw, ImageChops, ImageFont
import shapely.geometry as geom
from shapely.ops import unary_union
import warnings

import invr

# Ignore FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)

#### Creating the test dataset

In [2]:
Washington_Arlington_Alexandria_DC_VA_MD_WV_Counties = gpd.read_file('./data/DCMetroArea/DC_Metro_counties/Washington_Arlington_Alexandria_DC_VA_MD_WV_Counties.shp')

In [3]:
county_fips_list = Washington_Arlington_Alexandria_DC_VA_MD_WV_Counties['GEOID'].tolist()

In [4]:
us_svi = gpd.read_file('./data/DCMetroArea/SVI2020_US_tract.gdb')

In [5]:
dcmetro_svi = us_svi[us_svi['STCNTY'].isin(county_fips_list)]

In [6]:
dcmetro_svi.reset_index(drop=True)

Unnamed: 0,ST,STATE,ST_ABBR,STCNTY,COUNTY,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,...,MP_AIAN,EP_NHPI,MP_NHPI,EP_TWOMORE,MP_TWOMORE,EP_OTHERRACE,MP_OTHERRACE,Shape_Length,Shape_Area,geometry
0,11,District of Columbia,DC,11001,District of Columbia,11001000101,"Census Tract 1.01, District of Columbia, Distr...",0.077134,1250,238,...,3.1,0.0,3.1,0.2,3.3,1.1,1.8,0.021812,0.000021,"MULTIPOLYGON (((-77.05714 38.91054, -77.05184 ..."
1,11,District of Columbia,DC,11001,District of Columbia,11001000102,"Census Tract 1.02, District of Columbia, Distr...",0.658877,3318,442,...,1.2,0.0,1.2,1.1,1.6,0.4,0.6,0.093168,0.000231,"MULTIPOLYGON (((-77.06871 38.90102, -77.06280 ..."
2,11,District of Columbia,DC,11001,District of Columbia,11001000201,"Census Tract 2.01, District of Columbia, Distr...",0.194983,3972,474,...,1.1,0.0,1.0,4.1,2.2,0.0,1.0,0.032884,0.000050,"MULTIPOLYGON (((-77.07902 38.91268, -77.07433 ..."
3,11,District of Columbia,DC,11001,District of Columbia,11001000202,"Census Tract 2.02, District of Columbia, Distr...",0.299783,4665,740,...,0.9,0.4,0.5,2.9,2.3,0.0,0.9,0.069562,0.000131,"MULTIPOLYGON (((-77.07941 38.90564, -77.07831 ..."
4,11,District of Columbia,DC,11001,District of Columbia,11001000300,"Census Tract 3, District of Columbia, District...",0.402379,6504,705,...,0.6,0.0,0.6,1.2,0.9,0.6,0.9,0.040779,0.000109,"MULTIPOLYGON (((-77.08256 38.92150, -77.08201 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1481,54,West Virginia,WV,54037,Jefferson,54037972601,"Census Tract 9726.01, Jefferson County, West V...",8.494988,2126,315,...,1.6,0.0,1.6,4.1,3.1,0.5,0.6,0.319876,0.002495,"MULTIPOLYGON (((-77.82781 39.26186, -77.82441 ..."
1482,54,West Virginia,WV,54037,Jefferson,54037972602,"Census Tract 9726.02, Jefferson County, West V...",12.707738,4488,432,...,0.8,0.0,0.8,1.5,1.7,0.0,0.8,0.308355,0.003436,"MULTIPOLYGON (((-77.83519 39.32711, -77.83279 ..."
1483,54,West Virginia,WV,54037,Jefferson,54037972701,"Census Tract 9727.01, Jefferson County, West V...",7.256292,3075,420,...,1.7,0.0,1.1,1.2,1.1,0.0,1.1,0.295866,0.002096,"MULTIPOLYGON (((-77.81533 39.25442, -77.81478 ..."
1484,54,West Virginia,WV,54037,Jefferson,54037972702,"Census Tract 9727.02, Jefferson County, West V...",17.131470,4115,374,...,0.8,0.0,0.8,5.3,3.4,0.0,0.8,0.447210,0.004788,"MULTIPOLYGON (((-77.86088 39.15345, -77.85821 ..."


In [7]:
dcmetro_svi['RPL_THEMES'] = dcmetro_svi['RPL_THEMES'].replace(-999.00, 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [8]:
df_less = dcmetro_svi[['COUNTY','FIPS','LOCATION','RPL_THEMES','geometry']]

In [9]:
# Sorting the DataFrame based on the 'rate' column
df_less.sort_values(by='RPL_THEMES', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_less.sort_values(by='RPL_THEMES', inplace=True)


In [10]:
# Adding a new column 'new_ID' with ID values starting from zero
df_less['sortedID'] = range(len(df_less))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [11]:
df_less = df_less[['FIPS', 'sortedID', 'RPL_THEMES','geometry']]

In [12]:
# Convert the DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(df_less, geometry='geometry')

In [13]:
# Set the CRS to a simple Cartesian coordinate system
gdf.crs = "EPSG:3395"  # This is a commonly used projected CRS

In [14]:
gdf.sort_values(by='sortedID', inplace=True)

In [15]:
gdf.head(20)

Unnamed: 0,FIPS,sortedID,RPL_THEMES,geometry
78551,51059980100,0,0.0,"MULTIPOLYGON (((-77.216 38.755, -77.212 38.756..."
78064,51013980200,1,0.0,"MULTIPOLYGON (((-77.050 38.850, -77.048 38.852..."
15897,11001000201,2,0.0,"MULTIPOLYGON (((-77.079 38.913, -77.074 38.913..."
78909,51107980100,3,0.0,"MULTIPOLYGON (((-77.519 38.946, -77.516 38.952..."
16065,11001009511,4,0.0,"MULTIPOLYGON (((-77.008 38.942, -77.006 38.941..."
79446,51510980000,5,0.0,"MULTIPOLYGON (((-77.048 38.841, -77.044 38.840..."
78553,51059980300,6,0.0,"MULTIPOLYGON (((-77.157 38.959, -77.159 38.960..."
35162,24017990000,7,0.0,"MULTIPOLYGON (((-77.134 38.660, -77.133 38.674..."
78371,51059440504,8,0.0,"MULTIPOLYGON (((-77.316 38.846, -77.315 38.845..."
79141,51153980100,9,0.0,"MULTIPOLYGON (((-77.564 38.614, -77.562 38.614..."


#### Generate adjacent counties


In [16]:
def generate_adjacent_counties(dataframe,filtration_threshold):
    # filtered_df = dataframe[dataframe['Value_2'] < filtration_threshold]
    filtered_df = dataframe

    # Perform a spatial join to find adjacent precincts
    adjacent_counties = gpd.sjoin(filtered_df, filtered_df, predicate='intersects', how='left')

    # Filter the results to include only the adjacent states
    adjacent_counties = adjacent_counties.query('sortedID_left != sortedID_right')

    # Group the resulting dataframe by the original precinct Name and create a list of adjacent precinct Name
    adjacent_counties = adjacent_counties.groupby('sortedID_left')['sortedID_right'].apply(list).reset_index()

    adjacent_counties.rename(columns={'sortedID_left': 'county', 'sortedID_right': 'adjacent'}, inplace=True)

    adjacencies_list = adjacent_counties['adjacent'].tolist()

    merged_df = pd.merge(adjacent_counties, dataframe, left_on='county',right_on='sortedID', how='left')
    merged_df = gpd.GeoDataFrame(merged_df, geometry='geometry')

    return adjacencies_list,merged_df



In [17]:
adjacencies_list,adjacent_counties_df = generate_adjacent_counties(gdf,17)

In [18]:
adjacent_counties_df

Unnamed: 0,county,adjacent,FIPS,sortedID,RPL_THEMES,geometry
0,0,"[598, 102, 1018, 1197, 404]",51059980100,0,0.0000,"MULTIPOLYGON (((-77.216 38.755, -77.212 38.756..."
1,1,"[390, 5, 829, 489, 270, 12, 10]",51013980200,1,0.0000,"MULTIPOLYGON (((-77.050 38.850, -77.048 38.852..."
2,2,"[549, 154, 180]",11001000201,2,0.0000,"MULTIPOLYGON (((-77.079 38.913, -77.074 38.913..."
3,3,"[1138, 506, 11, 520, 289, 359, 552, 630]",51107980100,3,0.0000,"MULTIPOLYGON (((-77.519 38.946, -77.516 38.952..."
4,4,"[666, 1065, 1121, 1232, 1466]",11001009511,4,0.0000,"MULTIPOLYGON (((-77.008 38.942, -77.006 38.941..."
...,...,...,...,...,...,...
1481,1481,"[1033, 1462, 1281, 991, 775, 1440, 1159]",24033807409,1481,0.9769,"MULTIPOLYGON (((-76.962 39.031, -76.958 39.038..."
1482,1482,"[1465, 1182, 1317, 1327, 1337, 1392, 1240]",24033804001,1482,0.9790,"MULTIPOLYGON (((-76.919 38.947, -76.918 38.948..."
1483,1483,"[1428, 1270, 1266, 1310, 1371, 1008, 1465, 1275]",11001009601,1483,0.9817,"MULTIPOLYGON (((-76.958 38.906, -76.954 38.911..."
1484,1484,"[829, 1353, 1351, 117, 1201, 1354, 1422, 1430,...",11001007401,1484,0.9838,"MULTIPOLYGON (((-77.003 38.871, -77.001 38.871..."


In [19]:
def form_simplicial_complex(adjacent_county_list):
    max_dimension = 3

    V = []
    V = invr.incremental_vr(V, adjacent_county_list, max_dimension)

    return V
    

In [20]:
V = form_simplicial_complex(adjacencies_list)

In [21]:
def fig2img(fig):
     #convert matplot fig to image and return it

     buf = io.BytesIO()
     fig.savefig(buf)
     buf.seek(0)
     img = Image.open(buf)
     return img

In [34]:
def plot_simplicial_complex(dataframe,V,list_gif):

    #city centroids
    city_coordinates = {city.sortedID: np.array((city.geometry.centroid.x, city.geometry.centroid.y)) for _, city in dataframe.iterrows()}

    # Create a figure and axis
    fig, ax = plt.subplots(figsize=(20, 20))
    ax.set_axis_off() 

    # Plot the "wyoming_svi" DataFrame
    dataframe.plot(ax=ax, edgecolor='black', linewidth=0.3, color="white")

    # Plot the centroid of the large square with values
    # for i, row in dataframe.iterrows():
        # centroid = row['geometry'].centroid
        # text_to_display = f"FIPS: {row['FIPS']}\nFilteration: {row['RPL_THEMES']}"
        # plt.text(centroid.x, centroid.y, str(row['FIPS']), fontsize=15, ha='center', color="black")
        # plt.text(centroid.x, centroid.y, text_to_display, fontsize=15, ha='center', color="black")
    i=0
    for edge_or_traingle in V:
        
        if len(edge_or_traingle) == 2:
            # Plot an edge
            ax.plot(*zip(*[city_coordinates[vertex] for vertex in edge_or_traingle]), color='red', linewidth=1)
            # img = fig2img(fig)
            plt.savefig(f'resullts/test/frame_{i}.png') 
            # list_gif.append(img)
            i=i+1
        elif len(edge_or_traingle) == 3:
            # Plot a triangle
            ax.add_patch(plt.Polygon([city_coordinates[vertex] for vertex in edge_or_traingle], color='green', alpha=0.2))
            img = fig2img(fig)
            # list_gif.append(img)
            plt.savefig(f'resullts/test/frame_{i}.png') 
            i=i+1
        
    plt.close()

    # return list_gif


In [35]:
list_gif = []

In [36]:
plot_simplicial_complex(adjacent_counties_df,V,list_gif)

In [None]:
# list_gif = plot_simplicial_complex(adjacent_counties_df,V,list_gif)

In [None]:
# list_gif[0].save('DCMetroArea.gif',
#                  save_all=True,append_images=list_gif[1:],optimize=False,duration=25,loop=0)