In [None]:
import pandas as pd
from helper import map_generator
map_gen = map_generator()

# MAP GENERATOR REPORT

# Social Science and Map Visualizations: 

* Data collection in the social sciences always involves a spatial element insofar as humans are geographically bound objects of analysis.
* As such, the use of spatial visualizations such as maps serve as a useful tool for exploring the implications of a given social science data set.
* Maps are effective tools in the social science domain due to their ability to display historical narratives that might go unnoticed otherwise. 
* The ubiquity of maps as a tool for data communication ensures that users of differing backgrounds are readily able to extract useful information from geographic representations of data. 

* Although maps effectively communicate some variables, some features of data (such as uncertainty) are difficult to effectively encode in map form. Even so, maps should be considered an essential tool for any data exploration in the domain of social sciences. 


# PREPARING SOME DATA SETS
1. Make sure there's a 'FIPS_Code' column (and a 'year' column).
2. Make sure the variable you'd like to mapify is an integer. 

In [None]:
##Importing the Vera Institutes Incrceration Trends - Ensuring that the FIPS_Code column exists

file = 'data/incarcerationtrends/incarceration_trends.csv'
df = pd.read_csv(file)
df = df.rename(columns={'fips':'FIPS_Code'})
len(df)

In [None]:
##Importing the Property Tax Data Frame - Ensuring that the FIPS_Code column exists.

file = 'data/PropertyTaxDF.csv'
df2 = pd.read_csv(file)
df2 = df2.rename(columns={'County_Fip':'FIPS_Code'})
df2['Average_House_Value']=df2['Average_House_Value'].replace(',','', regex=True)
df2['Average_House_Value']=df2['Average_House_Value'].astype(int)
df2['Average_RE_Tax_Per_$1000']=df2['Average_RE_Tax_Per_$1000'].replace(',','', regex=True)
df2['Average_RE_Tax_Per_$1000']=df2['Average_RE_Tax_Per_$1000'].astype(int)
df2['Average_House_Value']=df2['Average_House_Value'].replace(',','', regex=True)
df2['Average_House_Value']=df2['Average_House_Value'].astype(int)
len(df2)

# USING THE MAP GENERATOR 

* Firstly, look at the columns, to identify the variables of interest. 
* After choosing a column of interest, ensure that it's a properly formatted integer and look at the summary statistics (in order to choose appropriate boundaries for the color schemes). 
* If necessary, use existing columns to create new variables, such as ratios. 

In [None]:
## Looking at variables of interest.
#list(df.columns),list(df2.columns)

In [None]:
## Fiddle with columns to create some new variables:

df['county_state'] = df['county_name']+', '+df['state'] ## So that the state is included in the tooltip. 
df['black_prison_ratio'] = df['black_prison_pop']/df['black_pop_15to64'] ## Ratio of Black prison population to total Black population 15 to 64
df['white_prison_ratio'] = df['white_prison_pop']/df['white_pop_15to64'] ## Ratio of Black prison population to total Black population 15 to 64
df['incar_rate'] = df['total_prison_pop']/df['total_pop'] # Proportion of total population that is prisoners
df['black_ratio'] = df['black_pop_15to64']/df['total_pop_15to64'] ## Proportion of total population (15 to 64) that is Black

In [None]:
#df['white_prison_adm_rate'].describe(), df['incar_rate'].describe(),df['black_prison_ratio'].describe(), df['black_pop_ratio'].describe()

In [None]:
#Proof of Concept 
## DF2 IS THE PROPERTY TAX BY COUNTIES DATA SET
df2.columns

## The Bokeh visualization library was used to create a function that takes the following parameters:

* **df** - the name of the data frame 
* **year** - int - the year of interest (there is also a version for data frames without years)
* **county_name_column** - str - the name of the counties as per the data frame
* **variable_column** - str - the variable of interest in the data frame
* **colors** - str - choice of color scheme from Color Brewer palette https://rdrr.io/cran/RColorBrewer/man/ColorBrewer.html 
* **low** - int - the min of the color scheme 
* **high** - int - the max of the color scheme 
* **title** - str - the title of the map


In [None]:
map_gen.getmap(
    df = df2, ## NAME OF DATA FRAME - in this case, property taxes by county 
    county_name_column = 'County_Name', ## Column with names of counties
    variable_column = 'Average_House_Value', ## Column with variable of interest
    colors = 'PuOr', ## Color scheme
    low = 50000, # Min palette 
    high = 300000, #Max palette
    title = 'Average Home Value' #Tite of plot
)

# Telling the Story of Mass Incarceration with Maps

In [None]:
### TECHNICAL PROBLEM ### YEARS 1996 to 2004 don't seem to work for some reason and I have no idea why. 

## **Georgia**

In exploring the Vera Institute's incarceration data, it made sense to start at the beginning. Using the map generator to look at incarceration rates in **1970** (the first year of available data), we see that only Georgia has any data at all (**MAP 1**). What does this mean? One way to interpret it is that Georgia was home to the vast majority of incarcerated Americans in 1970. Alternatively, one might note that perhaps Georgia started collecting incarceration data relatively early. The correct interpretation might be a mix of both. Perhaps Georgia was at the vanguard of data collection regarding prisoners in part due to it's extensive incarceration aparatus. After all, social science data collection is never independent from realities of political economy. 

This relatively early incarceration data from Georgia suggests that one must look beyond the data and investigate the history that could have generated such data in the first place. A quick Google search makes it clear that Georgia was indeed at the vanguard of data collection--for frightful reasons. Although the Thirteenth Amendment abolished American slavery, the language included one exception - forced labor was banned "except as a punishment for crime". The Reconstruction-era South was quick to latch onto this wording as a means to re-enslave the recently emancipated Black population. This new penchant for convict leasing meant that the exploitation of Black Americans thus began to take on a new form--mass incarceration. According to the 'Wikipedia' page on convict leasing (https://en.wikipedia.org/wiki/Convict_leasing), Georgia was the first to begin the large-scale re-enslavement of Black Americans: "In Georgia convict leasing began in April 1868, when Union General and newly appointed provisional governor Thomas H. Ruger issued a convict lease for prisoners to William Fort for work on the Georgia and Alabama Railroad. The contract specified "one hundred able bodied and healthy Negro convicts" in return for a fee to the state of $2500." Given the early beginning's of Georgia's carceral state, it's unsurprising that Georgia was already engaging in robust data collection by 1970. Prisoners were assets after all.   

Further reading: https://theconversation.com/prison-records-from-1800s-georgia-show-mass-incarcerations-racially-charged-beginnings-96612


In [None]:
## MAP 1
## PERCENTAGE OF COUNTY POPULATION THAT IS INCARCERATED - 1970


map_gen.getmap_with_year(
    df = df,
    year = 1970,
    county_name_column = 'county_state',
    variable_column = 'incar_rate',
    colors = 'BuPu', 
    low = 0, 
    high = 0.003, 
    title = 'Incarceration Rates (1970)'
)

By **1983** data collection regarding incarceration begins to take off across the country (**MAP 2**). In order to allow for appropriate granularity, the 'max' paramater for the color scheme has to be adjusted to account for increasing incarceration rates (0.003 to 0.005). Indeed, in the 1970s, Nixon began the 'War on Drugs' with the 'tough on crime' campaign that has come to be seen as 'thinly veiled racial rhetoric' by modern analyists. (https://www.brennancenter.org/our-work/analysis-opinion/history-mass-incarceration) The prison population truly began to expand under Reagan, however, as the crack epidemic (a public health crisis) was used as a pretext to terrorize communities of color--Black communities in particular. 

In [None]:
## MAP 2
## PERCENTAGE OF COUNTY POPULATION THAT IS INCARCERATED - 1983


map_gen.getmap_with_year(
    df = df,
    year = 1983,
    county_name_column = 'county_state',
    variable_column = 'incar_rate',
    colors = 'BuPu', 
    low = 0, 
    high = 0.003, 
    title = 'Incarceration Rates (1983)'
)

In looking at the 1983 map, it becomes immediately clear, that the American South is leading the way with higher incarceration rates. Given the racialized origins of mass incarceration, it is perhaps unsurprising that these areas are home to more concentrated Black communities. 

In [None]:
## MAP 3
## PROPORTION OF POPULATION THAT IS BLACK - 2015 

map_gen.getmap_with_year(
    df = df,
    year = 2015,
    county_name_column = 'county_state',
    variable_column = 'black_ratio',
    colors = 'Greens', 
    low = 0, 
    high = 0.8, 
    title = 'Black Population Proportion (2015)'
)

By **1995**, the trend is clear. Incarceration rates are exploding across the country. 

In [None]:
## MAP 4
## PERCENTAGE OF COUNTY POPULATION THAT IS INCARCERATED - 1995

map_gen.getmap_with_year(
    df = df,
    year = 1995,
    county_name_column = 'county_state',
    variable_column = 'incar_rate',
    colors = 'BuPu', 
    low = 0, 
    high = 0.005, 
    title = 'Incarceration Rates (1995)'
)

By **2005**, it is clear that a new color scheme is needed to account for the increasing incarceration rates. (MAP 5)

In [None]:
## MAP 5 
## PERCENTAGE OF COUNTY POPULATION THAT IS INCARCERATED - 2005

map_gen.getmap_with_year(
    df = df,
    year = 2005,
    county_name_column = 'county_state',
    variable_column = 'incar_rate',
    colors = 'BuPu', 
    low = 0, 
    high = 0.01, 
    title = 'Incarceration Rates (2005)'
)

By **2015**, it's clear that mass incarceration is a national phenomena, with the darkest color representing 1 in 100 people being incarcerated. (MAP 6)

In [None]:
## MAP 6
## PERCENTAGE OF COUNTY POPULATION THAT IS INCARCERATED - 2015

map_gen.getmap_with_year(
    df = df,
    year = 2015,
    county_name_column = 'county_state',
    variable_column = 'incar_rate',
    colors = 'BuPu', 
    low = 0, 
    high = 0.01, 
    title = 'Incarceration Rates (2015)'
)

Given that the data have steered us towards examining the history of the mass incarceration of Black people, it is perhaps useful to explore the extent to which Black Americans are represented in the incarcerated population. (MAP 7)

Using the tooltip to examine the particularly 'blue' counties allowed for Google searches to reveal the origins of the pattern. (NOTE: the darkest 'blue' connotes 1000 or more Black prisoners). 

In [None]:
## MAP 7

map_gen.getmap_with_year(
    df = df,
    year = 2015,
    county_name_column = 'county_state',
    variable_column = 'black_prison_pop',
    colors = 'BuPu', 
    low = 0, 
    high = 1000, 
    title = 'black_prison_adm_rate'
)

Despite the usefulness of map visualizations, mapping populations alone can distort deeper patterns. Indeed, what becomes lost in this geospatial data is the extent to which people move. With respect to prisons, it is well-documented that prisoners are frequently moved from high-density counties to low-density counties. This pattern isn't immediately visible in the data. In order to examine this trend via mapping techniques, one must be creative with the data. As such, a new column was created in the data frame ('black_prison_ration') which calculated the ratio of the incarcerated Black population to the overall Black population (aged 15-64). Though analytically imperfect, this new column reveals a pattern whereby Black residents as a percentage of the population are being incarcerated at extremely high rates (often more than 10%) in some counties. This pattern suggests that Black prisoners are being moved from high-density counties to low-density counties. (MAP 8) 

Moreover, in replicating the above map generation with the White prison population (MAP 9), this pattern of displacement does not appear. 

In [None]:
## MAP 8
## Ratio of Black Prison Count to Black Population (15-64)

map_gen.getmap_with_year(
    df = df,
    year = 2010,
    county_name_column = 'county_state',
    variable_column = 'black_prison_ratio',
    colors = 'BuPu', 
    low = 0, 
    high = 0.1, 
    title = 'black_pop_ratio'
)

In [None]:
## MAP 9
## Ratio of White Prison Count to White Population (15-64)
## NOTE: McPherson County, Nebraska has <500 people

map_gen.getmap_with_year(
    df = df,
    year = 2010,
    county_name_column = 'county_state',
    variable_column = 'white_prison_ratio',
    colors = 'RdPu', 
    low = 0, 
    high = 0.1, 
    title = 'white_pop_ratio'
)

# Conclusion 

The usage of mapping techniques to visualize the Vera Institute's incarceration trends data proved to be an effective technique for the historical analysis of mass incarceration in America. The very presence of thorough data in Georgia going back to 1970 indicated a large-scale incarceration apparatus was already being operated in that state. A review of the history of the Jim Crow South indicates that Georgia began the trend of 'convict leasing'--a widespread practice used to re-enslave Black Americans after the 13th amendment allowed for a labor-as-punishment loophole.

As we watch incarceration rates skyrocket across America from the 1980s onwards, it becomes clear that the numbers reflect changes in federal policy as the Nixon administration began a tough-on-crime political culture that flourished during the Reagan years and was left unchallenged until relatively recently. Given that the modern prison system emerged from the 'convict leasing' system that began in Georgia, it is unsurprising that the trends toward mass incarceration began in the Deep South before spreading throughout the country. 

Moreover, the use of map data allowed us to visualize the extent to which Black Americans are displaced from high-density counties to low-density counties--a pattern that was not replicated for White Americans. This pattern further indicates the extent to which Black Americans have been targeted by mass incarceration, as well as the extent to which geographically bound data analysis has its shortcomings. Given that the of relationship between social inputs (e.g. education spending) and social outcomes (e.g. incarceration status) is not necessarily bound by geography, it may not be justifiable to dwell on statistical relationships that hinge on geograpic location (e.g. county). 

Finally, the usage of mapping techniques allowed for a rapid evaluation of the quality of the Vera Institute Data Set. Although substantial, this data set has significant gaps which would undermine any statistical relationships that might be gleaned from a large-n analysis. For instance, data from Illinois are often missing from the data set. Given the state's complex history of race and mass incarceration, its exclusion suggests that any statistical correlations discovered in the data set should be taken with a grain of salt. Nonetheless, the data largely bear out the documented history of an incarceration apparatus that targets Black Americans. This is a mass tragedy that will force American society to contemplate the meaning of 'justice' for generations to come.

## SAVE MAP 

In [None]:
map_gen.savemap_with_year(
    df = df, 
    year = 1970,
    county_name_column = 'county_state', 
    variable_column = 'incar_rate', 
    colors = 'BuPu', 
    low = 0, 
    high = 0.003, 
    title = 'MAP 1: Incarceration Rates (1970)', 
    filename = 'charts/map_plots/MAP_1.png'
);

In [None]:
map_gen.savemap_with_year(
    df = df, 
    year = 1983,
    county_name_column = 'county_state', 
    variable_column = 'incar_rate', 
    colors = 'BuPu', 
    low = 0, 
    high = 0.003, 
    title = 'MAP 2: Incarceration Rates (1983)', 
    filename = 'charts/map_plots/MAP_2.png'
);

In [None]:
map_gen.savemap_with_year(
    df = df, 
    year = 2015,
    county_name_column = 'county_state', 
    variable_column = 'black_ratio', 
    colors = 'Greens', 
    low = 0, 
    high = 0.8, 
    title = 'MAP 3: Black Population Proportion (2015)', 
    filename = 'charts/map_plots/MAP_3.png'
);

In [None]:
map_gen.savemap_with_year(
    df = df, 
    year = 1995,
    county_name_column = 'county_state', 
    variable_column = 'incar_rate', 
    colors = 'BuPu', 
    low = 0, 
    high = 0.01, 
    title = 'MAP 4: Incarceration Rates (1995)', 
    filename = 'charts/map_plots/MAP_4.png'
);

In [None]:
map_gen.savemap_with_year(
    df = df, 
    year = 2005,
    county_name_column = 'county_state', 
    variable_column = 'incar_rate', 
    colors = 'BuPu', 
    low = 0, 
    high = 0.01, 
    title = 'MAP 5: Incarceration Rates (2005)', 
    filename = 'charts/map_plots/MAP_5.png'
);

In [None]:
map_gen.savemap_with_year(
    df = df, 
    year = 2015,
    county_name_column = 'county_state', 
    variable_column = 'incar_rate', 
    colors = 'BuPu', 
    low = 0, 
    high = 0.01, 
    title = 'MAP 6: Incarceration Rates (2015)', 
    filename = 'charts/map_plots/MAP_6.png'
);

In [None]:
map_gen.savemap_with_year(
    df = df, 
    year = 2015,
    county_name_column = 'county_state', 
    variable_column = 'black_prison_pop', 
    colors = 'BuPu', 
    low = 0, 
    high = 1000, 
    title = 'MAP 7: Total Black Prison Population', 
    filename = 'charts/map_plots/MAP_7.png'
);

In [None]:
map_gen.savemap_with_year(
    df = df, 
    year = 2010,
    county_name_column = 'county_state', 
    variable_column = 'black_prison_pop', 
    colors = 'RdPu', 
    low = 0, 
    high = 0.1, 
    title = 'MAP 8: Proportion of Black Population That Is Incarcerated', 
    filename = 'charts/map_plots/MAP_8.png'
);

In [None]:
map_gen.savemap_with_year(
    df = df, 
    year = 2010,
    county_name_column = 'county_state', 
    variable_column = 'white_prison_ratio', 
    colors = 'RdPu', 
    low = 0, 
    high = 0.1, 
    title = 'MAP 9: Proportion of White Population That Is Incarcerated', 
    filename = 'charts/map_plots/MAP_9.png'
);