In [1]:
import git
import geopandas
import pandas as pd

In [2]:
repo = git.Repo('.', search_parent_directories=True)
repo_path = repo.working_tree_dir

state_senate_districts_dataset = f"zip://{repo_path}/data/geo/florida_sdist_2021.zip"
us_rep_districts_dataset = f"zip://{repo_path}/data/geo/florida_usdist_2021.zip"
raw_data=f"{repo_path}/data/raw/hillsborough_county_evictions_geocoded.csv"
output_file=f"{repo_path}/data/_volunteer_created_datasets/_hallacy/hillsborough_county_evictions_geocoded.csv"

In [3]:
# Load district datasets
# 4326 turns the coordinates into lat long
input_dataset = pd.read_csv(raw_data)

state_senate_gdf = geopandas.read_file(state_senate_districts_dataset).to_crs(4326)
us_gdf = geopandas.read_file(us_rep_districts_dataset).to_crs(4326)

In [4]:
# Load raw input dataset
input_dataset = pd.read_csv(raw_data)
original_schema = list(input_dataset.columns.values)
input_dataset

Unnamed: 0,Court Type,BusinessName,LastName,FirstName,MiddleName,Suffix,Party Connection Type,Uniform Case Number,Uniform Traffic Citation,Case Type,...,match_indicator,match_type,matched_address,lon_lat,tiger_line_id,side,state_code,county_code,tract_code,block_code
0,County Court,,Moreno,Ismael,,JR,Defendant,292016CC040626A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,Match,Exact,"8214 SOLANO BAY LOOP, TAMPA, FL, 33635","-82.640305,28.02787",104614264.0,L,12.0,57.0,11709.0,1000.0
1,County Court,,ECHEVARRIA,PEDRO,,,Defendant,292016CC040089A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,Match,Exact,"1362 FOUR SEASONS BLVD, TAMPA, FL, 33613","-82.44465,28.076885",104512179.0,L,12.0,57.0,10817.0,1005.0
2,County Court,,VIERA,SANTOS,,,Defendant,292016CC039352A001HC,,"LT Removal of Tenant $0.00 - $15,000",...,Match,Non_Exact,"14521 FALL CIR, TAMPA, FL, 33613","-82.44785,28.07888",104512169.0,L,12.0,57.0,10817.0,1012.0
3,County Court,,GREENE,MENTORIA,,,Defendant,292016CC040397A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,Match,Exact,"5224 PICADOR CT, TAMPA, FL, 33617","-82.398155,28.026003",104532732.0,L,12.0,57.0,902.0,2001.0
4,County Court,,Morrison,Lauren,,,Defendant,292016CC040683A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,Match,Non_Exact,"6548 OSPREY LAKE CIR, RIVERVIEW, FL, 33578","-82.32808,27.89233",104641386.0,L,12.0,57.0,13410.0,2003.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38215,County Court,,AUSTIN,QUANISHA,,,Defendant,292019CC053474A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,Match,Exact,"7163 FAIRVIEW PARK DR, TAMPA, FL, 33619","-82.37599,27.97992",104618370.0,R,12.0,57.0,12002.0,1007.0
38216,County Court,,Avila,Katty,,,Defendant,292019CC059569A001HC,,"LT Removal of Tenant $0.00 - $15,000",...,Match,Exact,"8414 AIKEN CT, TAMPA, FL, 33615","-82.5741,28.026194",104556093.0,L,12.0,57.0,11610.0,3009.0
38217,County Court,,AVILES,HECTOR,,,Defendant,292019CC058211A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,No_Match,,,,,,,,,
38218,County Court,,AYCOCK,ALYSIA,D,,Defendant,292019CC045071A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,Tie,,,,,,,,,


In [5]:
# Grab lat long for joining
input_dataset[['lon','lat']] = input_dataset.lon_lat.str.split(',', expand=True)
input_dataset[['lon_lat','lon','lat']]


Unnamed: 0,lon_lat,lon,lat
0,"-82.640305,28.02787",-82.640305,28.02787
1,"-82.44465,28.076885",-82.44465,28.076885
2,"-82.44785,28.07888",-82.44785,28.07888
3,"-82.398155,28.026003",-82.398155,28.026003
4,"-82.32808,27.89233",-82.32808,27.89233
...,...,...,...
38215,"-82.37599,27.97992",-82.37599,27.97992
38216,"-82.5741,28.026194",-82.5741,28.026194
38217,,,
38218,,,


In [6]:
# Transform into geopandas dataframe
input_dataset_gdf = geopandas.GeoDataFrame(input_dataset, geometry=geopandas.points_from_xy(input_dataset.lon, input_dataset.lat, crs=state_senate_gdf.crs))
input_dataset_gdf['geometry']


0        POINT (-82.64030 28.02787)
1        POINT (-82.44465 28.07689)
2        POINT (-82.44785 28.07888)
3        POINT (-82.39816 28.02600)
4        POINT (-82.32808 27.89233)
                    ...            
38215    POINT (-82.37599 27.97992)
38216    POINT (-82.57410 28.02619)
38217               POINT (nan nan)
38218               POINT (nan nan)
38219    POINT (-82.31802 27.97739)
Name: geometry, Length: 38220, dtype: geometry

In [7]:
# Join with state senate data
jdf = geopandas.sjoin(input_dataset_gdf, state_senate_gdf, op='within',how='left')
jdf

Unnamed: 0,Court Type,BusinessName,LastName,FirstName,MiddleName,Suffix,Party Connection Type,Uniform Case Number,Uniform Traffic Citation,Case Type,...,index_right,DISTRICT,SENATOR,PARTY,SESSION_,DESCRIPT,FGDLAQDATE,AUTOID,SHAPE_AREA,SHAPE_LEN
0,County Court,,Moreno,Ismael,,JR,Defendant,292016CC040626A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,17.0,018,"CRUZ, JANET",DEMOCRATIC,2021,SENATE DISTRICT 18,2021-01-03,18.0,6.824771e+08,132135.026018
1,County Court,,ECHEVARRIA,PEDRO,,,Defendant,292016CC040089A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,19.0,020,"LEE, TOM",REPUBLICAN,2021,SENATE DISTRICT 20,2021-01-03,20.0,1.846512e+09,248073.856350
2,County Court,,VIERA,SANTOS,,,Defendant,292016CC039352A001HC,,"LT Removal of Tenant $0.00 - $15,000",...,19.0,020,"LEE, TOM",REPUBLICAN,2021,SENATE DISTRICT 20,2021-01-03,20.0,1.846512e+09,248073.856350
3,County Court,,GREENE,MENTORIA,,,Defendant,292016CC040397A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,18.0,019,"ROUSON, DARRYL ERVIN",DEMOCRATIC,2021,SENATE DISTRICT 19,2021-01-03,19.0,9.283215e+08,200602.861590
4,County Court,,Morrison,Lauren,,,Defendant,292016CC040683A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,18.0,019,"ROUSON, DARRYL ERVIN",DEMOCRATIC,2021,SENATE DISTRICT 19,2021-01-03,19.0,9.283215e+08,200602.861590
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38215,County Court,,AUSTIN,QUANISHA,,,Defendant,292019CC053474A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,18.0,019,"ROUSON, DARRYL ERVIN",DEMOCRATIC,2021,SENATE DISTRICT 19,2021-01-03,19.0,9.283215e+08,200602.861590
38216,County Court,,Avila,Katty,,,Defendant,292019CC059569A001HC,,"LT Removal of Tenant $0.00 - $15,000",...,17.0,018,"CRUZ, JANET",DEMOCRATIC,2021,SENATE DISTRICT 18,2021-01-03,18.0,6.824771e+08,132135.026018
38217,County Court,,AVILES,HECTOR,,,Defendant,292019CC058211A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,,,,,,,,,,
38218,County Court,,AYCOCK,ALYSIA,D,,Defendant,292019CC045071A001HC,,"LT Delinquent Tenant $0.00 - $15,000",...,,,,,,,,,,


In [8]:
# Rename DISTRICT field so we don't overwrite it in the next join
state_senate_name = "state_senate_district"
jdf = jdf.rename(columns={"DISTRICT": state_senate_name})

In [9]:
# These are remnants of the join and cause an exception if left in
jdf = jdf.drop(['index_right'], axis=1)

In [10]:
# Join in us district data
jdf = geopandas.sjoin(jdf, us_gdf, op='within',how='left')

In [11]:
# And now rename the district data and truncate only to the original columns and district number
us_district_name = "us_district"
jdf = jdf.rename(columns={"DISTRICT": us_district_name})

columns_to_get = list(original_schema) + [state_senate_name, us_district_name]
columns_to_get

['Court Type',
 'BusinessName',
 'LastName',
 'FirstName',
 'MiddleName',
 'Suffix',
 'Party Connection Type',
 'Uniform Case Number',
 'Uniform Traffic Citation',
 'Case Type',
 'Division',
 'Judge Name',
 'Date Filed',
 'Current Status',
 'Current Status Date',
 'Sex/Gender',
 'Party Address Line 1',
 'Party Address Line 2',
 'Party Address City',
 'Party Address State',
 'Party Address Zip Code',
 'Race',
 'Date of Birth',
 'Count Number',
 'Count Level and Degree',
 'Statute Violation',
 'Charge Description',
 'Offense Date',
 'Disposition Code',
 'Disposition Description',
 'Disposition Date',
 'Law Enforcement Agency Name',
 'Law Enforcement Officer Name',
 'Driver License Number',
 'Driver License State',
 'Commercial Vehicle',
 'Blood Alcohol Level',
 'Posted Speed',
 'Actual Speed',
 'Amount Paid',
 'Date Paid',
 'Defensive Driving School (DDS)',
 'DDS Court Ordered',
 'DDS Elected (Regular)',
 'DDS Elected (Advanced)',
 'Tag Number',
 'Tag State',
 'School Certificate Due Dat

In [12]:
# export
final_dataset = jdf[columns_to_get]
final_dataset.to_csv(output_file)