In [1]:
import json
import pandas as pd
from Levenshtein import distance
import geopandas as gpd
pd.set_option('display.max_rows', None)

In [2]:
def extract_names_from_geojson(geojson_file):
    with open(geojson_file, 'r') as file:
        data = json.load(file)
        names = []
        if data['type'] == 'FeatureCollection':
            for feature in data['features']:
                if 'name' in feature['properties']:
                    names.append(feature['properties']['name'])
        elif data['type'] == 'Feature':
            if 'name' in data['properties']:
                names.append(data['properties']['name'])
        return names

def get_match(str1:str, values:list[str]):
    closest_distance = float('inf')
    closest_value = None
    for value2 in values:
        if value2 == "SBIF":
            continue
        if ((False not in [word.lower() in value2.lower() for word in str1.split(' ')]) or 
            (False not in [word.lower() in str1.lower() for word in value2.split(' ')])):
            return value2
        dist = distance(str1.lower(), value2.lower())
        if dist < closest_distance:
            closest_distance = dist
            closest_value = value2
    return closest_value

def find_closest_match(list1:list[str], list2:list[str]):    
    limbo = []

    # Find closest match for each element in col1
    closest_mapping = {}
    closest_mapping_swapped = {None: []}

    for TIF in list1:
        closest_value = get_match(TIF, list2)
        
        # if (closest_value in closest_mapping_swapped):
        #     limbo.append((TIF, []))
        
        # while (limbo):
        #     new_key = limbo[0][0]
        #     tries = limbo[0][1]
        #     value = get_match(TIF, [x for x in list1 if x not in tries])
        #     set_key = closest_mapping_swapped[value] if value in closest_mapping_swapped else None

        #     if not value:
        #         closest_mapping[new_key] = value
        #         closest_mapping_swapped[value].append(new_key)
        #         limbo.pop()
        #     elif (set_key == None):
        #         closest_mapping[new_key] = value
        #         closest_mapping_swapped[value] = new_key
        #         limbo.pop()
        #     elif (distance(set_key, value) > distance(new_key, value)):
        #         del closest_mapping[set_key]
        #         del closest_mapping_swapped[value]
        #         closest_mapping[new_key] = value
        #         closest_mapping_swapped[value] = new_key
        #         limbo.pop()
        #         limbo.append((set_key, []))
        #     else:
        #         limbo[0][1].append(value)

        closest_mapping[TIF] = closest_value
        closest_mapping_swapped[closest_value] = TIF

    new_df = pd.DataFrame([(k,v, distance(k,v), 1-distance(k,v)/len(v)) for k,v in closest_mapping.items()], columns = (["1", "2", "distance", "accuracy"]))

    return new_df, closest_mapping_swapped[None]


In [3]:
geojson_file = 'Boundaries - Tax Increment Financing Districts.geojson'
json_tifs = extract_names_from_geojson(geojson_file)
print(pd.Series(json_tifs))

0                                         116th/Avenue O
1                                     Bryn Mawr/Broadway
2                                     51st and Lake Park
3                                              Lakefront
4                                           Madden/Wells
5                                            Ohio/Wabash
6      Stony Island Avenue Commercial and Burnside In...
7                                       Homan-Arthington
8                                            Wilson Yard
9                                     Montrose/Clarendon
10                                Diversey/Chicago River
11                                          Goose Island
12                                       79th and Cicero
13                                     Edgewater/Ashland
14                                     Chicago/Kingsbury
15                                       Belmont/Central
16                                            River West
17                             

In [6]:
# tif_transfers = pd.DataFrame(columns=["TIF", "Year", "Amount"])
tif_transfers = pd.DataFrame(columns=["TIF", "Year"])

for i in range(2017, 2027):
    df = pd.read_csv(f"DataGovCSVs/TIF_Transfers_{i}_modified.csv")
    for index, row in df.iterrows():
        if not (row["TIF1"] in list(tif_transfers["TIF"])):
            tif_transfers = tif_transfers.append({
                "TIF": row["TIF1"],
                "Year": i,
                "Amount": int(row["Amount"])
            }, ignore_index=True)
        else:
            tif_transfers.loc[tif_transfers["TIF"] == row["TIF1"], "Amount"] += int(row["Amount"])

tif_transfers = tif_transfers.sort_values(by="TIF")  # Assign the sorted DataFrame back to tifs
print(tif_transfers)

  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers = tif_transfers.append({
  tif_transfers 

                                                   TIF  Year       Amount
35                                     105th/Vincennes  2017   -3379500.0
2                                        107th/Halsted  2017    5800000.0
28        111th Street/Kedzie Avenue Business District  2017   -2000000.0
94                                         116th Ave O  2021    9600000.0
95                                   119th and Halsted  2022   -4500100.0
54                                          119th/I-57  2017   -8251500.0
23                                  126th and Torrence  2017     400000.0
31                                       24th/Michigan  2017   21310000.0
87                                    35th and Wallace  2020   -6000000.0
76                                        35th/Halsted  2019  -19591500.0
96                                          35th/State  2022    6000000.0
97                                  43rd/Cottage Grove  2022   20000000.0
107                                   

In [5]:
matches, nones = find_closest_match(tif_transfers["TIF"].unique(), json_tifs)
# matches

In [6]:
# matches[matches["accuracy"] < 1]

In [7]:
matches.loc[matches['1'] == "105th/Vincennes", '2'] = "105th Street and Vincennes Avenue"
matches.loc[matches['1'] == "43rd/Cottage Grove", '2'] = "43rd Street/Cottage Grove Avenue"
matches.loc[matches['1'] == "47th/King Drive", '2'] = "47th and King Drive"
matches.loc[matches['1'] == "Commercial Avenue", '2'] = "Commercial Avenue"
matches.loc[matches['1'] == "Kinzie Industrial Corridor", '2'] = "Kinzie Industrial Conservation Area"
matches.loc[matches['1'] == "Woodlawn", '2'] = "Woodlawn"
matches.loc[matches['1'] == "Red Purple Transit", '2'] = "Red and Purple Modernization Phase One Project"

In [8]:
outdated_TIFS = ["126th and Torrence", 
                 "35th and Wallace",
                 "60th and Western",
                 "Calumet Avenue/Cermak Road",
                 "Clark Street and Ridge Avenue",
                 "Devon/Western",
                 "Irving Park/Elston",
                 "Jefferson Park Business District",
                 "North Branch (North)",
                 "North Branch (South)",
                 "North-Cicero",
                 "Portage Park",
                 "Read-Dunning"]
matches = matches[~matches['1'].isin(outdated_TIFS)]
matches = matches[["1", "2"]]
matches

Unnamed: 0,1,2
0,105th/Vincennes,105th Street and Vincennes Avenue
1,107th/Halsted,107th/Halsted
2,111th Street/Kedzie Avenue Business District,111th Street/Kedzie Avenue Business District
3,116th Ave O,116th/Avenue O
4,119th and Halsted,119th and Halsted
5,119th/I-57,119th Street/I-57
7,24th/Michigan,24th/Michigan
9,35th/Halsted,35th/Halsted
10,35th/State,35th/State
11,43rd/Cottage Grove,43rd Street/Cottage Grove Avenue


In [9]:
conversions = {row["1"]:row["2"] for index, row in matches.iterrows()}
tif_transfers["TIF"] = tif_transfers["TIF"].replace(conversions)
tif_transfers = tif_transfers[~tif_transfers["TIF"].isin(outdated_TIFS)]

In [10]:
tif_bounds = gpd.read_file(geojson_file)
tif_bounds.rename(columns={"name":"TIF"},inplace=True)
tif_bounds = tif_bounds[tif_bounds["TIF"].isin(tif_transfers["TIF"])]
tif_bounds.sample()

Unnamed: 0,sbif,TIF,shape_area,show,objectid_1,name_trim,wards_2023,ref,approval_d,objectid,...,shape_leng,comm_area,objectid_2,use,repealed_d,type,shape_le_1,ind,expiration,geometry
45,Y,Roosevelt/Cicero,24319335.8214,1,48,Roosevelt/Cicero,2429,T- 38,2/5/1998,5216,...,45943.7620531,252629,39,Industrial,,Existing,45943.7620531,Industrial,12/31/2034,"MULTIPOLYGON (((-87.73986 41.86940, -87.73985 ..."


In [11]:
[row["TIF"] for index, row in tif_transfers.iterrows() if not row["TIF"] in list(tif_bounds["TIF"])]

[]

In [12]:
tif_transfers.to_csv("DataGovCSVs/TIF_Transfers_merged.csv")