# Setup

In [6]:
import contextily as cx
import figure_utilities
import constants
from stats_utilities import produce_summary_statistics, select_controls, test_balance
import geopandas as gpd
import matplotlib.pyplot as plt
from panel_utilities import get_value_variable_names, prepare_df_for_DiD
import numpy as np
plt.rcParams['savefig.dpi'] = 300
import os
import pandas as pd

In [7]:
# Store paths.
INPUT_DATA_PANEL = "../../data/03_cleaned/crime_analysis_monthly.parquet"
INPUT_DATA_TRACTS = "../../data/02_intermediate/tracts.csv"
INPUT_DATA_BOSTON_TRACTS_SHAPEFILE = "../../data/01_raw/Census_2010_Tracts"
OUTPUT_TABLES = "../../output/final_paper/tables"
OUTPUT_FIGURES = "../../output/final_paper/figures"

# Summary Statistics

## Crime Trends 

In [8]:
# Read unrestricted dataset into memory.
df = pd.read_parquet(INPUT_DATA_PANEL)
df = df.reset_index() # So we can use 'case_number' like a column

# Reshape to long
triplet = get_value_variable_names(df, f"group_0_crimes_{constants.Analysis.MAIN_RESULTS_RADIUS}m")
weekly_value_vars_crime, month_to_int_dictionary, _ = triplet
df = pd.melt(df,
             id_vars=['case_number',
                      'judgment_for_plaintiff',
                      'latest_docket_month'],
             value_vars=weekly_value_vars_crime,
             var_name='month')
df.loc[:, 'month'] = df['month'].str[:7]  # Drop "_group_0_crimes_500m" from the end of each month

# Replace months with integers
df.loc[:, ['latest_docket_month', 'month']] = df[['latest_docket_month', 'month']].replace(month_to_int_dictionary)

# Calculate crime levels during each month relative to treatment, separately for treatment and control gropu                                    
df.loc[:, 'treatment_relative_month'] = df['month'] - df['latest_docket_month']
df = df.groupby(['judgment_for_plaintiff', 'treatment_relative_month'])['value'].agg(['mean'])
control_crime_trend = df.loc[(0, slice(constants.Analysis.MINIMUM_PRE_PERIOD,
                                       constants.Analysis.MAXIMUM_POST_PERIOD)),
                             :]
treatment_crime_trend = df.loc[(1, slice(constants.Analysis.MINIMUM_PRE_PERIOD,
                                         constants.Analysis.MAXIMUM_POST_PERIOD)),
                               :]    
fig, ax = plt.subplots()
ax.plot(control_crime_trend.index.get_level_values(1),
        control_crime_trend['mean'],
        color='black',
        linestyle='--',
        label="Properties where Tenant Won Eviction Case")
ax.plot(treatment_crime_trend.index.get_level_values(1),
        treatment_crime_trend['mean'],
        color='black',
        linestyle='-',
        label="Properties where Plaintiff Won Eviction Case")
ax.set_xlabel("Month Relative to Latest Docket Date")
ax.set_ylabel(f"Crime Incidents within {constants.Analysis.MAIN_RESULTS_RADIUS} Meters")
ax.legend()

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "crime_trends.png"))


## Map of Evictions, Colored by Poverty Rate in Census Tract

In [20]:
# Read unrestricted dataset into memory.
df = pd.read_parquet(INPUT_DATA_PANEL)
df = df.reset_index() # So we can use 'case_number' like a column

# Create spatial data 
unrestricted_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['Longitude'], df['Latitude']))
unrestricted_gdf = unrestricted_gdf.set_crs("EPSG:4326")
unrestricted_gdf = unrestricted_gdf.to_crs("EPSG:3857")

# Plot
fig, ax = plt.subplots(figsize=(8, 10))
ax.set_yticklabels([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_xticks([])
unrestricted_gdf.plot(ax=ax,
                      color='black',
                      markersize=0.05)
cx.add_basemap(ax=ax, crs="EPSG:3857", source=cx.providers.CartoDB.Positron)

# Color census tracts by poverty rate.
boston_tracts_gdf = gpd.read_file(INPUT_DATA_BOSTON_TRACTS_SHAPEFILE)[['GEOID10', 'geometry']].set_index('GEOID10')
boston_tracts_gdf.index = boston_tracts_gdf.index.astype(int)
tract_poverty_rates_df = pd.read_csv(INPUT_DATA_TRACTS, usecols=['tract_geoid', 'poor_share2010'],
                                     index_col='tract_geoid')
boston_tracts_gdf = pd.concat([boston_tracts_gdf, tract_poverty_rates_df], axis=1).dropna(
    subset=['geometry', 'poor_share2010']).drop(index=25025990101)
boston_tracts_gdf.plot(ax=ax, column=boston_tracts_gdf['poor_share2010'], cmap='OrRd', alpha=0.4, legend=True,
                       legend_kwds={'label': "Poverty Rate of Census Tract",
                                    'orientation': "horizontal",
                                    'shrink': 0.25})

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "evictions_map.png"))

In [21]:
# Plot the number of eviction filings over time.
df.loc[:, 'last_day_of_file_month'] = (pd.to_datetime(df['file_date']) +
                                       pd.tseries.offsets.MonthEnd(0))
filings_per_month = df.groupby('last_day_of_file_month')['case_number'].count()

# Plot eviction filing counts.
fig, ax = plt.subplots()
filings_per_month.plot(ax=ax, kind='line', color='black',
                       zorder=100)
ax.set_ylabel("Number of Evictions")
ax.set_xlabel("Month")
ax.grid(True)
figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "filings_over_time.png"))


In [None]:
# Produce summary statistics table.
treatment_date_variable = 'latest_docket_date'
outcomes_of_interest = [f'group_0_crimes_{constants.Analysis.MAIN_RESULTS_RADIUS}m']
summary_statistics_unrestricted, variable_display_names_dict = produce_summary_statistics(df,
                                                                                          treatment_date_variable=treatment_date_variable)

# Rename columns.
summary_statistics_unrestricted.index = summary_statistics_unrestricted.index.set_names(["Panel", "Variable"])
column_display_names_dict = {'mean': "Mean", 'std': "S.D.", 'count': "N", '50%': 'Median'}

summary_statistics_unrestricted = summary_statistics_unrestricted.sort_values(['Panel', 'Variable'])
# Keep only outcomes of interest
outcomes = constants.Variables.outcomes.copy()
for outcome in outcomes:
    if outcome not in outcomes_of_interest:
        if f"pre_treatment_change_in_{outcome}" in summary_statistics_unrestricted.index.get_level_values(1):
            summary_statistics_unrestricted = summary_statistics_unrestricted.drop(f"pre_treatment_change_in_{outcome}",
                                                                                   level=1, axis=0)
        if f"total_twenty_seventeen_{outcome}" in summary_statistics_unrestricted.index.get_level_values(1):
            summary_statistics_unrestricted = summary_statistics_unrestricted.drop(f"total_twenty_seventeen_{outcome}",
                                                                                   level=1, axis=0)

# Drop Panel F.
summary_statistics_unrestricted = summary_statistics_unrestricted.drop("Panel F: Post-treatment Outcomes", level=0,
                                                                       axis=0)

# Drop median column.
summary_statistics_unrestricted = summary_statistics_unrestricted.drop(columns='50%')

# Export to LaTeX.
filename = os.path.join(OUTPUT_TABLES, "summary_statistics.tex")
latex = (summary_statistics_unrestricted
         .rename(index=variable_display_names_dict)
         .rename(columns=column_display_names_dict)
         .style
         .format(formatter={
    'Mean': "{:,.2f}",
    'Median': "{:,.2f}",
    'S.D.': "{:,.2f}",
    'N': "{:,.0f}"})
         .format_index("\\textit{{{}}}", escape="latex", axis=0, level=0)
         .to_latex(None,
                   column_format="llcccc",
                   hrules=True,
                   clines="skip-last;data")).replace("{*}", "{4cm}")
with open(filename, 'w') as file:
    file.write(latex)
summary_statistics_unrestricted

In [22]:
# Produce treatment timings table.
treatment_timings = (df
                     .groupby(['latest_docket_month', 'judgment_for_plaintiff'])['case_number']
                     .count()
                     .reset_index()
                     .fillna(0))
treatment_timings = treatment_timings.pivot(index='latest_docket_month', columns='judgment_for_plaintiff').fillna(0)
treatment_timings.columns = ["Cases Won By Defendant", "Cases Won By Plaintiff"]
portion_of_all_cases = (treatment_timings['Cases Won By Plaintiff'] + treatment_timings[
    'Cases Won By Defendant']) / len(df)
treatment_timings = pd.concat([treatment_timings, portion_of_all_cases.rename('Portion of All Cases')], axis=1)
sum_across_filing_date = pd.DataFrame(treatment_timings.sum(axis=0)).T
sum_across_filing_date.index = ["All Months"]
treatment_timings = pd.concat([sum_across_filing_date, treatment_timings], axis=0)
treatment_timings.index = treatment_timings.index.rename("Last Docket Date")

# Export to LaTeX.
filename = os.path.join(OUTPUT_TABLES, "treatment_timings.tex")
treatment_timings.style.format(formatter={'Cases Won By Plaintiff': '{:,.0f}',
                                          'Cases Won By Defendant': '{:,.0f}',
                                          'Portion of All Cases': '{:0.2f}'}).to_latex(filename, column_format="lccc",
                                                                                       hrules=True)
treatment_timings

Unnamed: 0_level_0,Cases Won By Defendant,Cases Won By Plaintiff,Portion of All Cases
Last Docket Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
All Months,734.0,955.0,1.0
2019-04,1.0,5.0,0.003552
2019-05,1.0,7.0,0.004737
2019-06,51.0,24.0,0.044405
2019-07,64.0,47.0,0.065719
2019-08,68.0,110.0,0.105388
2019-09,76.0,101.0,0.104796
2019-10,90.0,98.0,0.111308
2019-11,62.0,76.0,0.081705
2019-12,67.0,75.0,0.084073


In [None]:
# Calculate percent of cases which are in high poverty neighborhoods.
df = df.loc[df['judgment_for_plaintiff'] == 1, :]
original_N = len(df)
cases_in_poor_tracts = len(df.loc[df['poor_share2010'] > 0.20, :])
cases_in_poor_tracts / original_N