In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import scipy.stats as st
from datetime import datetime
import gmaps
import os

# Import API key
from api_keys import g_key



### Importing data set from Cleanup notebook output to start analysis and visualization

In [2]:
# Import mother_ship for use in analysis notebook
mother_ship = pd.read_csv('../Resources/mother_ship.csv') 
mother_ship

Unnamed: 0,SID_Number,TDCJ_Number,Name,Current_Facility,Gender,Race,Age,Projected_Release,Max_Sentence_Date,Parole_Eligible_Date,...,Sentence_Date,Offense_Date,Sentence_Years,Last_Parole_Decision,Next_Parole_Review,Parole_Review_Status,Offense_Category,Bin_Cat,Age_Bin,Population_2020
0,234378,2174505,"LITTLE,AVA JESSUP",Young,F,W,75,10/01/2020,11/15/2025,09/26/2018,...,12/14/2017,09/13/2017,8.0,Approved on 07/01/2020,,NOT IN REVIEW PROCESS,2200,Drugs,70_to_80,131710
1,1468825,2205051,"EVANS,DAYTON BUD",Pack,M,W,70,04/21/2028,03/11/2043,10/20/2020,...,03/12/2018,03/31/2017,25.0,Denied on 08/13/2020,08/2021,NOT IN REVIEW PROCESS,2200,Drugs,70_to_80,131710
2,1505838,2119905,"PENNINGTON,RICKEY DWAYNE",Pack,M,W,68,12/21/2028,12/21/2028,04/11/2018,...,02/08/2017,12/21/2016,12.0,Denied on 01/02/2020,12/2020,IN PAROLE REVIEW PROCESS,2200,Drugs,60_to_70,131710
3,2141308,2153019,"JACOBS,ANTHONY",Ellis,M,W,64,12/18/2023,06/14/2033,02/28/2019,...,07/27/2017,10/11/2016,16.0,Denied on 02/19/2020,02/2021,NOT IN REVIEW PROCESS,2200,Drugs,60_to_70,131710
4,2360914,2290868,"WALDEN,LONNIE WAYNE",Gurney,M,W,62,04/02/2033,05/28/2049,,...,10/24/2019,05/29/2019,30.0,,Unavailable at this time.,NOT IN REVIEW PROCESS,2200,Drugs,60_to_70,131710
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120702,5826119,2251746,"MUNOZ,JOSE JR",Polunsky,M,H,40,04/26/2022,07/24/2026,08/19/2019,...,02/04/2019,07/24/2018,8.0,Denied on 07/07/2020,07/2021,NOT IN REVIEW PROCESS,3100,Evading_Arrest,40_to_50,783
120703,4231127,2043573,"BRAVO,CARLOS RUIZ",Wynne,M,H,61,06/30/2033,04/25/2060,12/31/2019,...,12/02/2015,10/19/2012,45.0,Denied on 12/09/2019,12/2020,IN PAROLE REVIEW PROCESS,2200,Drugs,60_to_70,783
120704,7987519,2284322,"MORRIS,JUSTIN LEE",Dominguez,M,W,31,05/30/2021,05/30/2021,,...,07/08/2019,07/21/2017,2.0,,,,1600,Theft,30_to_40,783
120705,7273574,2311921,"GONZALEZ,MIGUEL",Willacy County,M,H,30,07/12/2022,02/09/2027,01/13/2020,...,01/31/2020,06/04/2016,8.0,Denied on 06/24/2020,06/2021,NOT IN REVIEW PROCESS,100,Human_Trafficking,30_to_40,783


### Importing csv file that has county seat coordinates for creating heat maps

In [3]:
# Read in county coordinates
# this info will be used in heat map creation
# for location of county seat within each county

# --------NOTE -------- lAT AND lNG ARE FLIPPED -----NEED TO RENAME BELOW

countycoordfull_df = pd.read_csv('../Resources/Texas_Counties_Centroid_Map.csv')
# countycoordfull_df

### Starting process to create data set for analysis and visualization

In [4]:
# Creating df to start gathering data for plotting - as noted above - need to switch lat lng

countycoord_df = countycoordfull_df.loc[:,["County", "Lng", "Lat"]]
countycoord_df = countycoord_df.rename(columns={"Lat": "Longitude", "Lng" : "Latitude"})
# countycoord_df

In [5]:
#creating df from mother_ship - dropping columns not used

tymothership = mother_ship.loc[:,["SID_Number", "TDCJ_Number", "Current_Facility", 
                                  "Gender", "Race", "Age", "County", "Offense_Code", 
                                  "TDCJ_Offence", "Sentence_Years", "Offense_Category", 
                                  "Bin_Cat", "Age_Bin", "Population_2020"]]
# tymothership

In [6]:
#creating df of info to be plotted

typlot = pd.merge_ordered(tymothership, countycoord_df, fill_method='ffill', left_by='County')
# typlot
# typlot.isna().sum()
# typlot.nunique()

### Side activities to investigate potential issues with county data set.  There are 254 counties in Texas.  253 counties were identified in the data set from the State prison system and 252 unique counties populations

In [7]:
# Running nunique identified 252 unique populations for 253 counties 
# need to investigate

# Import renamed column county pop file for use in analysis notebook
rcproj_county2020_df =  pd.read_csv('../Resources/rcproj_county2020_df.csv')

# rcproj_county2020_df

In [8]:
# looking for reason of 252 unique populations - possible dup
dupPop = rcproj_county2020_df[rcproj_county2020_df.duplicated(["Population_2020"])]
# dupPop

In [9]:
# Found dup populations and identified the 2 county names - both are 
# relatively small so will leave values as presented by owner of data

rcproj_county2020_df.loc[rcproj_county2020_df['Population_2020'] == 13592]

Unnamed: 0,County,Population_2020
57,Dawson,13592
237,Ward,13592


In [10]:
# The are 254 counties in Texas.  nunique identified 253
# so either there is a dup or 1 county does not have person in 
# prisoner data issued by State on Oct 28, 2020.  Data is upated
# regularly, so the data used for project is only a snap shot in time.

# Found a numpy function that will subtract 1 array from another - 
# probably could have used one that compares names - but chose
# one that will subtract one from another - thus yielding the population
# of the missing county.  Once that is obtained, just match the pop with the county

# The creates the array of counties populations in Texas

pop_original = rcproj_county2020_df["Population_2020"].unique()
pop_original
spop_original = np.sort(pop_original)
# spop_original

In [11]:
# This creates the array of county populations identified in the Texas prison file

pop_typlot = typlot["Population_2020"].unique()
spop_typlot = np.sort(pop_typlot)
# spop_typlot

In [12]:
# This compares the 2 arrays and provides the outlier
# population number

result = np.setxor1d(spop_original, spop_typlot)
result

array([309], dtype=int64)

In [13]:
# This is the lookup for the county that has a population equal to 309
#  this gives the name of the county that as of the time of the dataset from Texas
# does not have a person in the system

countynotlisted = rcproj_county2020_df.loc[rcproj_county2020_df['Population_2020'] == 309]
countynotlisted

Unnamed: 0,County,Population_2020
134,King,309


### It was determined that one county, King, did not have a person in the prison system data set which was updated on Oct 28, 2020.  Also, it was determined that 2 counties were assigned the same population by the State for 2020.  King county coordinates were removed from the coordinate listing and the duplicate population was left because it was data presented by the State.

In [14]:
# Creates adjusted listing of counties with population (removing King)
# so it can be merged with grouped _cobincat and then per pop values can be determined
# Dropping the county to ensure NaN's do not show up in future eforts
# This will be used in developing the database for creating heat maps 
# to present data visualization

adrcproj_county2020_df = rcproj_county2020_df.drop([134])
# adrcproj_county2020_df

### Creating a baseline population heatmap for general comparison in presentation

In [15]:
# Create baseline population distribution heat map
# this will allow comparison to offender count heat maps

baselineheat_df = pd.merge_ordered(adrcproj_county2020_df, countycoord_df, fill_method='ffill', left_by='County')
# baselineheat_df

In [16]:
gmaps.configure(api_key=g_key)

In [17]:
# data from baseline heat df

# # Store 'Lat' and 'Lng' into  locations
locationsbase = baselineheat_df[["Latitude", "Longitude"]]

# Weight
baseweight = baselineheat_df["Population_2020"]

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

# Plot Heatmap
fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# Create heat layer
heat_layerbase = gmaps.heatmap_layer(locationsbase, weights=baseweight, 
                                 dissipating=False, max_intensity=1500000,
                                 point_radius=1)

# Add layer
fig.add_layer(heat_layerbase)

# Display figure
fig

Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

### After resolving county count and population questions above, and creating baseline heatmap for population - continue activities to build data sets for conducting analysis.

### The final data sorts are not in the order of the presentation.  Because of the large amount of data, many different "sorts" were created to review and see which best represented the data.  In many cases, the sort was too granular which resulted in just points without a story.  The final sorts used to create the heat maps were kept and the others were deleted to remove clutter.   Therefore, the work below is not in the order of the presentation.  Notes will be presented to match the data with the respective heatmap.

In [18]:
# Groupby function to collect information by County and Bin_Cat aggregating on Offense_Category
# Another step in merging dfs to create plot dataset

# Task was assigned to present heat maps based on County Outliers - the term was
# open ended to allow for creating more specific visualizations as the data 
# manipulation presented itself

# df created ties county to number of individuals in prison for that 
# respective offense (Bin_Cat)

grouped_cobincat = typlot.groupby(["County", "Bin_Cat"]).agg({'Offense_Category': ['count']}).reset_index()
grouped_cobincat.columns = ["County", "Bin_Cat", "Count"]
# grouped_cobincat
# grouped_cobincat.dtypes

# Groupby that was not used - data not presented well - added additional parameter
# grouped_bincat = typlot.groupby(["County"]).agg({'Offense_Category': ['count']})
# grouped_bincat.head(50)


In [19]:
# Merging step and creating additional columns to allow for normalizing data by population
# Kept overall poulation column for reference, used per 1000 to create maps

typlotsub = pd.merge_ordered(grouped_cobincat , adrcproj_county2020_df, fill_method='ffill', left_by='County')
typlotsub["Pop_per_1000"] = (typlotsub["Population_2020"] / 1000).round(3)
typlotsub["Offenders_per_Pop"] = (typlotsub["Count"] / typlotsub["Population_2020"]).round(4)
typlotsub["Offenders_per_1000"] = (typlotsub["Count"] / typlotsub["Pop_per_1000"]).round(3)
# typlotsub

In [20]:
# this is only for informational purposes
# total per top 5 bins
typlotsubcounts = typlotsub.groupby(["Bin_Cat"]).agg({'Count': ['sum']})
typlotsubcounts.columns = typlotsubcounts.columns.droplevel(0)
typcss = typlotsubcounts.sort_values(by=['sum'], ascending=False)
typcss.head()

Unnamed: 0_level_0,sum
Bin_Cat,Unnamed: 1_level_1
Sexual_Assault,22052
Assault,17812
Robbery,17630
Drugs,17151
Murder,14937


In [21]:
# this step merges df that has coordinates of each county seat for the 
# respective county - this will be used to create the heat maps

typlotalldata = pd.merge_ordered(typlotsub, countycoord_df, fill_method='ffill', left_by='County')
# typlotalldata

### These population bins were initially created for purposes of use in the heatmaps.  Once created and data plotted - it was determined the data was still to granular and the bins were noted used.    The information was left because because it was incorporated into the data set and attempting to remove would potentially create a bigger issue.  Also, at some point in the future, the bins could be used to further slice the data for more detailed review. 

In [22]:
# Create pop bins - doing this now to allow for evaluting data - 
# created because larger counties will wash out their rates
# these are very granular bins that may be used for creating data visualization on 
# county size range basis - as noted above  - may or may not use based on final eval

# must have 1 extra bin then actual catorgies - reason for the 0 at the beginning

agebins = [0, 1000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000, 10000000]

# Create labels for these bins
agegroup_labels = ["LT1K", "1K_to_10K", "10K_to_50K", "50K_to_100K",
                   "100K_to_250K", "250K_to_500K", "500K_to_750K",
                   "750K_to_1M", "GT1M"]  

# Create new column - Pop_Bin

typlotalldata["Pop_Bin"] = pd.cut(typlotalldata["Population_2020"], agebins, labels=agegroup_labels)
# typlotalldata

In [23]:
#Sort 1 - overall sorted by Offense per 1000
# there is so much data  - chose to look at top 5 offense categories
# Indiviual dfs will be created to then create heatmaps and how
# dispersed or concentrated the respective top 5 categories are
# The top 5 are Sexual Assault, Assault, Murder, Robbery, and Drugs

# These 5 categories represent ~74% of the prison population
# if Theft and Burglary were combined with Roberry - value would approach 80%

typadsortb1k = typlotalldata.sort_values(by=['Count'], ascending=False)
typadsortb1k.head()

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin
1649,Harris,Robbery,4849,4978845,4978.845,0.001,0.974,29.859671,-95.397821,GT1M
1645,Harris,Murder,3192,4978845,4978.845,0.0006,0.641,29.859671,-95.397821,GT1M
917,Dallas,Robbery,2595,2734111,2734.111,0.0009,0.949,32.766537,-96.777819,GT1M
1650,Harris,Sexual_Assault,2520,4978845,4978.845,0.0005,0.506,29.859671,-95.397821,GT1M
918,Dallas,Sexual_Assault,2305,2734111,2734.111,0.0008,0.843,32.766537,-96.777819,GT1M


In [24]:
# sort verifies largest pop counties have the hights total counts
# no anomilies identified - df not used but maintined for review as needed
#Sort 2 - overall sorted by highest number count

typadsortcou = typlotalldata.sort_values(by=['Count'], ascending=False)
# typadsortcou.head(20)

In [25]:
# The following data manipulation serves the purpose of 
# creating total population based dfs that can be used to compare rates
# for the top 5 offense categories.  There is a df for each pop and offense
# category combination - understand should have used a loop - time to trouble shot
# a loop would take longer since I am learning than to bang it out manually  - cut n, paste
# future goal is a loop

# Not all datasets will be presented for class project because of time restraints
# more interesting heat maps will be selected and presented

### Data set for heatmaps that shows total number of offenses committed per county without taking into account the population of the county (Heatmap 1) and the same data, but sorted based on the counties with highest offense rates per 1,000 population (Heatmap 2).  These map are compared to the overall population heatmap to show how offense activities line up with state population centers.

In [26]:
# Data for Heat map 1 and 2 - the table below is just sorted 
# by count of offenses.  The data used in heatmap 1 is cws(count)
# and the data for heatmap 2 is cws (offenders per 1000)
# Sort 12  - Identifies each counties worst Bin per 1000

countysworst = typlotalldata.loc[typlotalldata.groupby('County').Count.idxmax().values]
countysworst
cws = countysworst.sort_values(by=['Count'], ascending=False)
cws.head()

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin
1649,Harris,Robbery,4849,4978845,4978.845,0.001,0.974,29.859671,-95.397821,GT1M
917,Dallas,Robbery,2595,2734111,2734.111,0.0009,0.949,32.766537,-96.777819,GT1M
3494,Tarrant,Sexual_Assault,1715,2143755,2143.755,0.0008,0.8,32.771852,-97.291165,GT1M
263,Bexar,Robbery,1492,2093502,2093.502,0.0007,0.713,29.448845,-98.519663,GT1M
3617,Travis,Sexual_Assault,691,1291502,1291.502,0.0005,0.535,30.334233,-97.781947,GT1M


In [27]:
# Heat Map 1 - using data from Sort 12
# this gives an overall heat map for each county and their "worst" bin
# based on total number of individuals in prision - the heat map matches 
# population distribution


gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locations = cws[["Latitude", "Longitude"]]

# Weight
bincount = cws["Count"]

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

# Plot Heatmap
fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# Create heat layer
heat_layer = gmaps.heatmap_layer(locations, weights=bincount, 
                                 dissipating=False, max_intensity=1000,
                                 point_radius=1)


# Add layer
fig.add_layer(heat_layer)

# Display figure
fig


Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

In [28]:
# Heat Map 2  - using data from Sort 12
# this gives an overall heat map for each county and their "worst" bin
# per 1000 pop - this removes the larger county dilution - presents
# different look and allows for inclusion and evaluation of the smaller pop
# size counties in the discussion

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locations = cws[["Latitude", "Longitude"]]

# Weight
bincount = cws["Offenders_per_1000"]

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

# Plot Heatmap
fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# Create heat layer
heat_layer = gmaps.heatmap_layer(locations, weights=bincount, 
                                 dissipating=False, max_intensity=30,
                                 point_radius=1)


# Add layer
fig.add_layer(heat_layer)

# Display figure
fig


Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

### Data set for Heat map 6 - 3rd in the presentation flow of heatmaps.  This is a more focused view of heatmap 2 with top 10 locations identified

In [29]:
# sort 20 - highest rates - top 10  used for heat map6
countysworst3 = typlotalldata.loc[typlotalldata.groupby('County').Count.idxmax().values]
# countysworst3
cws3 = countysworst3.sort_values(by=['Offenders_per_1000'], ascending=False)


cws3plot = cws3.loc[cws3['Offenders_per_1000'] > 3.845]
# cws3plot.to_csv('../Resources/map6data.csv', index=False)
cws3plot

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin
2165,Kenedy,Drugs,8,476,0.476,0.0168,16.807,26.924094,-97.681378,LT1K
2436,Loving,Drugs,1,92,0.092,0.0109,10.87,31.84913,-103.579906,LT1K
557,Carson,Drugs,42,5799,5.799,0.0072,7.243,35.403468,-101.354204,1K_to_10K
416,Brown,Drugs,220,38923,38.923,0.0057,5.652,31.774323,-98.999896,10K_to_50K
1062,Eastland,Drugs,87,18205,18.205,0.0048,4.779,32.327393,-98.832571,10K_to_50K
288,Borden,Sexual_Assault,3,685,0.685,0.0044,4.38,32.743692,-101.431753,LT1K
1565,Hall,Assault,14,3305,3.305,0.0042,4.236,34.530741,-100.680988,1K_to_10K
846,Cottle,Assault,6,1510,1.51,0.004,3.974,34.077657,-100.278698,1K_to_10K
2938,Palo Pinto,Drugs,108,27859,27.859,0.0039,3.877,32.753169,-98.312995,10K_to_50K
2178,Kerr,Drugs,201,52267,52.267,0.0038,3.846,30.06153,-99.35017,50K_to_100K


In [30]:
# Used to find approximate max_intensity for heap map 6
# cws3["Offenders_per_1000"].max()

In [31]:
# Create table for heat map support - placed on chart to provide county info
# for each blue dot location

map6_df = cws3plot[["County", "Bin_Cat", "Count", "Population_2020", "Offenders_per_1000"]]
map6_df = map6_df.rename(columns={"Bin_Cat": "Offense"})
# map6_df

map6_df.style.format({'Offenders_per_1000': '{:.2f}'})


Unnamed: 0,County,Offense,Count,Population_2020,Offenders_per_1000
2165,Kenedy,Drugs,8,476,16.81
2436,Loving,Drugs,1,92,10.87
557,Carson,Drugs,42,5799,7.24
416,Brown,Drugs,220,38923,5.65
1062,Eastland,Drugs,87,18205,4.78
288,Borden,Sexual_Assault,3,685,4.38
1565,Hall,Assault,14,3305,4.24
846,Cottle,Assault,6,1510,3.97
2938,Palo Pinto,Drugs,108,27859,3.88
2178,Kerr,Drugs,201,52267,3.85


In [32]:
# # Heat Map 6 - using data from Sort 20
# # this gives an overall heat map for each county and their "worst" bin
# # based on total number of individuals in prision - the heat map matches 
# # population distribution

# -------------------------------

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locationsw = countysworst3[["Latitude", "Longitude"]]

# Weight
bincountw = countysworst3["Offenders_per_1000"]

# Create heat layer
heat_layerw = gmaps.heatmap_layer(locationsw, weights=bincountw, 
                                 dissipating=False, max_intensity=12,
                                 point_radius=1)

# --------------------------------

# creating markers (pins with numbers and the dots)
info_box_templatewc = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>Offense</dt><dd>{Bin_Cat}</dd>
<dt>Prisoners_per_1000_pop</dt><dd>{Offenders_per_1000}</dd>
</dl>
"""

county_infowc = [info_box_templatewc.format(**row) for index, row in cws3plot.iterrows()]
locationswc = cws3plot[["Latitude", "Longitude"]]
marker_locations = [1,2,3,4,5,6,7,8,9,10]


# Add marker layer on top of heat map
topc_layerwc = gmaps.symbol_layer(
    locationswc, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=6)

# these are the pins with numbers and the info boxes
markerswc = gmaps.marker_layer(locations = locationswc, 
                             label = [f" {x}" for x in marker_locations],
                             info_box_content=[f"{county}" for county in county_infowc])

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))
      

fig.add_layer(heat_layerw)
fig.add_layer(topc_layerwc)
fig.add_layer(markerswc)

fig



Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

### As noted above, the original population bins were not used because they were too granular.  The following information is for the final population divisions.

### This is the data set for counties with populations greater than 10,000 and less than 100,000.  This is the 5th heatmap in the flow of the presentation.

In [34]:
# Data collection for visualization
# learned original pop_bins created were 
# actually to granular - therefore amended direction
# and went less detailed.  Detailed data
# did provide guidance for the final "slicing"
# for note - lower "Sort numbers" were for sorts not used - 

# Sort 21 - Cities pop 100,000 < X > 10,000

# creates single column with True or False meeting criteria
inihm7 = (typlotalldata["Population_2020"] >= 10000) & (typlotalldata["Population_2020"] < 100000)
# inihm7
# converting to dataframe with column name crit
inihm7 = inihm7.to_frame('crit')
# merging criteria dataframe with bigger dataframe
hm7cri = typlotalldata.merge(inihm7, how = "outer", left_index=True, right_index=True)
# converting crit column to value so it can be sorted upon - True becomes 1 and False becomes 0
hm7cri["crit"] = hm7cri["crit"]*1
hm7 = hm7cri.loc[hm7cri['crit'] == 1]
# hm7["County"].unique()
# hm7

# Sexual Assault
sexaslthm7_df = hm7.loc[hm7['Bin_Cat'] == "Sexual_Assault"]

# Assault
aslthm7_df = hm7.loc[hm7['Bin_Cat'] == "Assault"]

# Murder
murhm7_df = hm7.loc[hm7['Bin_Cat'] == "Murder"]

# Robbery
robhm7_df = hm7.loc[hm7['Bin_Cat'] == "Robbery"]

# Drugs
drugshm7_df = hm7.loc[hm7['Bin_Cat'] == "Drugs"]


frameshm7 = [sexaslthm7_df, aslthm7_df, murhm7_df, robhm7_df, drugshm7_df]
top5hm7 = pd.concat(frameshm7)
top5hm7

# coupophm7 = hm7.loc[:,"Population_2020"]
# nduphm7 = coupophm7.drop_duplicates().sum()
# # ndupgt50

sorttop5hm7 = top5hm7.sort_values(by=['Offenders_per_1000'], ascending=False)
sorttop5hm7.head()

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
416,Brown,Drugs,220,38923,38.923,0.0057,5.652,31.774323,-98.999896,10K_to_50K,1
1062,Eastland,Drugs,87,18205,18.205,0.0048,4.779,32.327393,-98.832571,10K_to_50K,1
2938,Palo Pinto,Drugs,108,27859,27.859,0.0039,3.877,32.753169,-98.312995,10K_to_50K,1
2178,Kerr,Drugs,201,52267,52.267,0.0038,3.846,30.06153,-99.35017,50K_to_100K,1
411,Brown,Assault,129,38923,38.923,0.0033,3.314,31.774323,-98.999896,10K_to_50K,1


In [35]:
t5sorttop5hm7dups = sorttop5hm7.loc[sorttop5hm7['Offenders_per_1000'] > 2.645]
t5sorttop5hm7dups

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
416,Brown,Drugs,220,38923,38.923,0.0057,5.652,31.774323,-98.999896,10K_to_50K,1
1062,Eastland,Drugs,87,18205,18.205,0.0048,4.779,32.327393,-98.832571,10K_to_50K,1
2938,Palo Pinto,Drugs,108,27859,27.859,0.0039,3.877,32.753169,-98.312995,10K_to_50K,1
2178,Kerr,Drugs,201,52267,52.267,0.0038,3.846,30.06153,-99.35017,50K_to_100K,1
411,Brown,Assault,129,38923,38.923,0.0033,3.314,31.774323,-98.999896,10K_to_50K,1
1976,Jackson,Drugs,49,15899,15.899,0.0031,3.082,28.956031,-96.578833,10K_to_50K,1
1073,Eastland,Sexual_Assault,55,18205,18.205,0.003,3.021,32.327393,-98.832571,10K_to_50K,1
3149,Red River,Sexual_Assault,38,12610,12.61,0.003,3.013,33.620745,-95.050189,10K_to_50K,1
2255,Lamar,Drugs,149,50014,50.014,0.003,2.979,33.667462,-95.571108,50K_to_100K,1
1852,Hopkins,Drugs,109,37040,37.04,0.0029,2.943,33.149338,-95.564142,10K_to_50K,1


### The step below is to drop the counties that have high enough offense ratios in more than more offense category that they are listed multiple times in the sort's top 10.  The duplicate county listings were removed to allow for the inclusion of different counties into the top 10.

In [36]:
# need to drop counties listed more than once to get highest value to present in heat map
t5sorttop5hm7 =  t5sorttop5hm7dups.drop_duplicates(subset=['County'])
t5sorttop5hm7 

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
416,Brown,Drugs,220,38923,38.923,0.0057,5.652,31.774323,-98.999896,10K_to_50K,1
1062,Eastland,Drugs,87,18205,18.205,0.0048,4.779,32.327393,-98.832571,10K_to_50K,1
2938,Palo Pinto,Drugs,108,27859,27.859,0.0039,3.877,32.753169,-98.312995,10K_to_50K,1
2178,Kerr,Drugs,201,52267,52.267,0.0038,3.846,30.06153,-99.35017,50K_to_100K,1
1976,Jackson,Drugs,49,15899,15.899,0.0031,3.082,28.956031,-96.578833,10K_to_50K,1
3149,Red River,Sexual_Assault,38,12610,12.61,0.003,3.013,33.620745,-95.050189,10K_to_50K,1
2255,Lamar,Drugs,149,50014,50.014,0.003,2.979,33.667462,-95.571108,50K_to_100K,1
1852,Hopkins,Drugs,109,37040,37.04,0.0029,2.943,33.149338,-95.564142,10K_to_50K,1
1417,Gonzales,Assault,60,21347,21.347,0.0028,2.811,29.456415,-97.492799,10K_to_50K,1
949,Deaf Smith,Drugs,48,18143,18.143,0.0026,2.646,34.96602,-102.604816,10K_to_50K,1


In [37]:
# Create table for visualization
map7_df = t5sorttop5hm7[["County", "Bin_Cat", "Count", "Population_2020", "Offenders_per_1000"]]
map7_df = map7_df.rename(columns={"Bin_Cat": "Offense"})
# map6_df
# map7_df.to_csv('../Resources/map7_df.csv', index=False)
# map6_df.style.set_properties(align="center")
map7_df.style.format({'Offenders_per_1000': '{:.2f}'})

Unnamed: 0,County,Offense,Count,Population_2020,Offenders_per_1000
416,Brown,Drugs,220,38923,5.65
1062,Eastland,Drugs,87,18205,4.78
2938,Palo Pinto,Drugs,108,27859,3.88
2178,Kerr,Drugs,201,52267,3.85
1976,Jackson,Drugs,49,15899,3.08
3149,Red River,Sexual_Assault,38,12610,3.01
2255,Lamar,Drugs,149,50014,2.98
1852,Hopkins,Drugs,109,37040,2.94
1417,Gonzales,Assault,60,21347,2.81
949,Deaf Smith,Drugs,48,18143,2.65


In [59]:
# Heat Map 7 - using data from Sort 21
# this gives a heat map for counties with population between 10k and 100k

# --------------------------

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locationshm7 = sorttop5hm7[["Latitude", "Longitude"]]

# Weight
bincounthm7 = sorttop5hm7["Offenders_per_1000"]

# Create heat layer
heat_layerhm7 = gmaps.heatmap_layer(locationshm7, weights=bincounthm7, 
                                 dissipating=False, max_intensity=30,
                                 point_radius=1)

# ------------------------------------------

# Using the template add the marks to the heatmap


info_box_templatehm7 = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>Offense</dt><dd>{Bin_Cat}</dd>
<dt>Prisoners_per_1000_pop</dt><dd>{Offenders_per_1000}</dd>
</dl>
"""

county_infohm7 = [info_box_templatehm7.format(**row) for index, row in t5sorttop5hm7.iterrows()]
locationshm7 = t5sorttop5hm7 [["Latitude", "Longitude"]]
marker_locations = [1,2,3,4,5,6,7,8,9,10]


# Add marker layer on top of heat map
topc_layerhm7 = gmaps.symbol_layer(
    locationshm7, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=6)

#adding pins with numbers and info box
markershm7 = gmaps.marker_layer(locations = locationshm7, 
                             label = [f" {x}" for x in marker_locations],
                             info_box_content=[f"{county}" for county in county_infohm7])

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

fig.add_layer(heat_layerhm7)
fig.add_layer(markershm7)
fig.add_layer(topc_layerhm7)

fig



Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

### This is the data set for counties with populations greater than 500,000.  This is the 7th heatmap in the flow of the presentation.

In [40]:
# Data collection for visualization
# learned original pop_bins created were 
# actually to granular - therefore amended direction
# and went less detailed.  Detailed data
# did provide guidance for the final "slicing"
# for note - lower sort numbers were for sorts not used - 

# Sort 22 - Cities pop > 500,000

inihm8 = (typlotalldata["Population_2020"] >= 500000) 
inihm8 = inihm8.to_frame('crit')
hm8cri = typlotalldata.merge(inihm8, how = "outer", left_index=True, right_index=True)
hm8cri["crit"] = hm8cri["crit"]*1
hm8 = hm8cri.loc[hm8cri['crit'] == 1]
# hm8["County"].unique()
# hm8

# Sexual Assault
sexaslthm8_df = hm8.loc[hm8['Bin_Cat'] == "Sexual_Assault"]


# Assault
aslthm8_df = hm8.loc[hm8['Bin_Cat'] == "Assault"]

# Murder
murhm8_df = hm8.loc[hm8['Bin_Cat'] == "Murder"]

# Robbery
robhm8_df = hm8.loc[hm8['Bin_Cat'] == "Robbery"]


# Drugs
drugshm8_df = hm8.loc[hm8['Bin_Cat'] == "Drugs"]


frameshm8 = [sexaslthm8_df, aslthm8_df, murhm8_df, robhm8_df, drugshm8_df]
top5hm8 = pd.concat(frameshm8)
top5hm8

# coupophm7 = hm7.loc[:,"Population_2020"]
# nduphm7 = coupophm7.drop_duplicates().sum()
# # ndupgt50

sorttop5hm8 = top5hm8.sort_values(by=['Offenders_per_1000'], ascending=False)
sorttop5hm8.head()

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
1649,Harris,Robbery,4849,4978845,4978.845,0.001,0.974,29.859671,-95.397821,GT1M,1
917,Dallas,Robbery,2595,2734111,2734.111,0.0009,0.949,32.766537,-96.777819,GT1M,1
918,Dallas,Sexual_Assault,2305,2734111,2734.111,0.0008,0.843,32.766537,-96.777819,GT1M,1
3494,Tarrant,Sexual_Assault,1715,2143755,2143.755,0.0008,0.8,32.771852,-97.291165,GT1M,1
912,Dallas,Murder,2109,2734111,2734.111,0.0008,0.771,32.766537,-96.777819,GT1M,1


In [41]:
t5sorttop5hm8dups = sorttop5hm8.loc[sorttop5hm8['Offenders_per_1000'] > 0.35]
t5sorttop5hm8dups.head(20)

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
1649,Harris,Robbery,4849,4978845,4978.845,0.001,0.974,29.859671,-95.397821,GT1M,1
917,Dallas,Robbery,2595,2734111,2734.111,0.0009,0.949,32.766537,-96.777819,GT1M,1
918,Dallas,Sexual_Assault,2305,2734111,2734.111,0.0008,0.843,32.766537,-96.777819,GT1M,1
3494,Tarrant,Sexual_Assault,1715,2143755,2143.755,0.0008,0.8,32.771852,-97.291165,GT1M,1
912,Dallas,Murder,2109,2734111,2734.111,0.0008,0.771,32.766537,-96.777819,GT1M,1
2737,Montgomery,Sexual_Assault,459,613951,613.951,0.0007,0.748,30.300224,-95.503014,500K_to_750K,1
263,Bexar,Robbery,1492,2093502,2093.502,0.0007,0.713,29.448845,-98.519663,GT1M,1
3493,Tarrant,Robbery,1394,2143755,2143.755,0.0007,0.65,32.771852,-97.291165,GT1M,1
242,Bexar,Assault,1358,2093502,2093.502,0.0006,0.649,29.448845,-98.519663,GT1M,1
1645,Harris,Murder,3192,4978845,4978.845,0.0006,0.641,29.859671,-95.397821,GT1M,1


In [42]:
# need to drop counties listed more than once to get highest value to present in heat map
#hm8
t5sorttop5hm8 =  t5sorttop5hm8dups.drop_duplicates(subset=['County'])
t5sorttop5hm8 

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
1649,Harris,Robbery,4849,4978845,4978.845,0.001,0.974,29.859671,-95.397821,GT1M,1
917,Dallas,Robbery,2595,2734111,2734.111,0.0009,0.949,32.766537,-96.777819,GT1M,1
3494,Tarrant,Sexual_Assault,1715,2143755,2143.755,0.0008,0.8,32.771852,-97.291165,GT1M,1
2737,Montgomery,Sexual_Assault,459,613951,613.951,0.0007,0.748,30.300224,-95.503014,500K_to_750K,1
263,Bexar,Robbery,1492,2093502,2093.502,0.0007,0.713,29.448845,-98.519663,GT1M,1
711,Collin,Sexual_Assault,566,1039369,1039.369,0.0005,0.545,33.187891,-96.572489,GT1M,1
3617,Travis,Sexual_Assault,691,1291502,1291.502,0.0005,0.535,30.334233,-97.781947,GT1M,1
1777,Hidalgo,Sexual_Assault,422,870366,870.366,0.0005,0.485,26.396627,-98.180887,750K_to_1M,1
1115,El Paso,Assault,352,876120,876.12,0.0004,0.402,31.768623,-106.235223,750K_to_1M,1
3970,Williamson,Sexual_Assault,235,589914,589.914,0.0004,0.398,30.648349,-97.601055,500K_to_750K,1


In [43]:
# Create table for visualization
map8_df = t5sorttop5hm8[["County", "Bin_Cat", "Count", "Population_2020", "Offenders_per_1000"]]
map8_df = map8_df.rename(columns={"Bin_Cat": "Offense"})
# map6_df
# map8_df.to_csv('../Resources/map8_df.csv', index=False)
# map6_df.style.set_properties(align="center")
map8_df.style.format({'Offenders_per_1000': '{:.2f}'})

Unnamed: 0,County,Offense,Count,Population_2020,Offenders_per_1000
1649,Harris,Robbery,4849,4978845,0.97
917,Dallas,Robbery,2595,2734111,0.95
3494,Tarrant,Sexual_Assault,1715,2143755,0.8
2737,Montgomery,Sexual_Assault,459,613951,0.75
263,Bexar,Robbery,1492,2093502,0.71
711,Collin,Sexual_Assault,566,1039369,0.55
3617,Travis,Sexual_Assault,691,1291502,0.54
1777,Hidalgo,Sexual_Assault,422,870366,0.48
1115,El Paso,Assault,352,876120,0.4
3970,Williamson,Sexual_Assault,235,589914,0.4


In [62]:
# Heat Map 8 - using data from Sort 22
# this gives a heat map for counties with pops x > 500k

# ------------------------------

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locationshm8 = sorttop5hm8[["Latitude", "Longitude"]]

# Weight
bincounthm8 = sorttop5hm8["Offenders_per_1000"]

# Create heat layer
heat_layerhm8 = gmaps.heatmap_layer(locationshm8, weights=bincounthm8, 
                                 dissipating=False, max_intensity=5,
                                 point_radius=1)


# --------------------------------


info_box_templatehm8 = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>Offense</dt><dd>{Bin_Cat}</dd>
<dt>Prisoners_per_1000_pop</dt><dd>{Offenders_per_1000}</dd>
</dl>
"""

county_infohm8 = [info_box_templatehm8.format(**row) for index, row in t5sorttop5hm8.iterrows()]
locationshm8 = t5sorttop5hm8 [["Latitude", "Longitude"]]
marker_locations = [1,2,3,4,5,6,7,8,9,10]


# Add marker layer on top of heat map
topc_layerhm8 = gmaps.symbol_layer(
    locationshm8, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=6)

# add pins with numbers and info box
markershm8 = gmaps.marker_layer(locations = locationshm8, 
                             label = [f" {x}" for x in marker_locations],
                             info_box_content=[f"{county}" for county in county_infohm8])

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

fig.add_layer(heat_layerhm8)
fig.add_layer(markershm8)
fig.add_layer(topc_layerhm8)

fig


Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

### This is the data set for counties with populations greater than 100,000 and less than 500,000.  This is the 6th heatmap in the flow of the presentation.

In [46]:
# Data collection for visualization
# learned original pop_bins created were 
# actually to granular - therefore amended direction
# and went less detailed.  Detailed data
# did provide guidance for the final "slicing"
# for note - lower sort numbers were for sorts not used - 


# Sort 23 - Cities pop < 500,000 and > 100,000

inihm9 = (typlotalldata["Population_2020"] >= 100000) & (typlotalldata["Population_2020"] < 500000)
inihm9 = inihm9.to_frame('crit')
hm9cri = typlotalldata.merge(inihm9, how = "outer", left_index=True, right_index=True)
hm9cri["crit"] = hm9cri["crit"]*1
hm9 = hm9cri.loc[hm9cri['crit'] == 1]
# hm9["County"].unique()
# hm9

# Sexual Assault
sexaslthm9_df = hm9.loc[hm9['Bin_Cat'] == "Sexual_Assault"]

# Assault
aslthm9_df = hm9.loc[hm9['Bin_Cat'] == "Assault"]

# Murder
murhm9_df = hm9.loc[hm9['Bin_Cat'] == "Murder"]

# Robbery
robhm9_df = hm9.loc[hm9['Bin_Cat'] == "Robbery"]

# Drugs
drugshm9_df = hm9.loc[hm9['Bin_Cat'] == "Drugs"]

frameshm9 = [sexaslthm9_df, aslthm9_df, murhm9_df, robhm9_df, drugshm9_df]
top5hm9 = pd.concat(frameshm9)
top5hm9

# coupophm7 = hm7.loc[:,"Population_2020"]
# nduphm7 = coupophm7.drop_duplicates().sum()
# # ndupgt50

sorttop5hm9 = top5hm9.sort_values(by=['Offenders_per_1000'], ascending=False)
sorttop5hm9.head(20)

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
3510,Taylor,Drugs,400,139457,139.457,0.0029,2.868,32.301382,-99.890039,100K_to_250K,1
3062,Potter,Drugs,303,122706,122.706,0.0025,2.469,35.401289,-101.894048,100K_to_250K,1
3074,Potter,Sexual_Assault,282,122706,122.706,0.0023,2.298,35.401289,-101.894048,100K_to_250K,1
2597,McLennan,Sexual_Assault,539,253066,253.066,0.0021,2.13,31.552345,-97.201849,250K_to_500K,1
3057,Potter,Assault,259,122706,122.706,0.0021,2.111,35.401289,-101.894048,100K_to_250K,1
3578,Tom Green,Drugs,254,123276,123.276,0.0021,2.06,31.404444,-100.462068,100K_to_250K,1
2583,McLennan,Drugs,516,253066,253.066,0.002,2.039,31.552345,-97.201849,250K_to_500K,1
1458,Grayson,Drugs,262,131710,131.71,0.002,1.989,33.626844,-96.677649,100K_to_250K,1
2070,Johnson,Drugs,301,171701,171.701,0.0018,1.753,32.378999,-97.366605,100K_to_250K,1
3384,Smith,Drugs,401,235143,235.143,0.0017,1.705,32.375146,-95.269095,100K_to_250K,1


In [47]:
t5sorttop5hm9dups = sorttop5hm9.loc[sorttop5hm9['Offenders_per_1000'] > 1.3]
t5sorttop5hm9dups.head()

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
3510,Taylor,Drugs,400,139457,139.457,0.0029,2.868,32.301382,-99.890039,100K_to_250K,1
3062,Potter,Drugs,303,122706,122.706,0.0025,2.469,35.401289,-101.894048,100K_to_250K,1
3074,Potter,Sexual_Assault,282,122706,122.706,0.0023,2.298,35.401289,-101.894048,100K_to_250K,1
2597,McLennan,Sexual_Assault,539,253066,253.066,0.0021,2.13,31.552345,-97.201849,250K_to_500K,1
3057,Potter,Assault,259,122706,122.706,0.0021,2.111,35.401289,-101.894048,100K_to_250K,1


In [48]:
# need to drop counties listed more than once to get highest value to present in heat map
t5sorttop5hm9 =  t5sorttop5hm9dups.drop_duplicates(subset=['County'])
t5sorttop5hm9

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
3510,Taylor,Drugs,400,139457,139.457,0.0029,2.868,32.301382,-99.890039,100K_to_250K,1
3062,Potter,Drugs,303,122706,122.706,0.0025,2.469,35.401289,-101.894048,100K_to_250K,1
2597,McLennan,Sexual_Assault,539,253066,253.066,0.0021,2.13,31.552345,-97.201849,250K_to_500K,1
3578,Tom Green,Drugs,254,123276,123.276,0.0021,2.06,31.404444,-100.462068,100K_to_250K,1
1458,Grayson,Drugs,262,131710,131.71,0.002,1.989,33.626844,-96.677649,100K_to_250K,1
2070,Johnson,Drugs,301,171701,171.701,0.0018,1.753,32.378999,-97.366605,100K_to_250K,1
3384,Smith,Drugs,401,235143,235.143,0.0017,1.705,32.375146,-95.269095,100K_to_250K,1
1481,Gregg,Assault,191,125730,125.73,0.0015,1.519,32.480365,-94.817237,100K_to_250K,1
2035,Jefferson,Robbery,377,258678,258.678,0.0015,1.457,29.884258,-94.170878,250K_to_500K,1
2980,Parker,Drugs,189,135621,135.621,0.0014,1.394,32.778523,-97.804722,100K_to_250K,1


In [49]:
# Create table for visualization
map9_df = t5sorttop5hm9[["County", "Bin_Cat", "Count", "Population_2020", "Offenders_per_1000"]]
map9_df = map9_df.rename(columns={"Bin_Cat": "Offense"})
# map6_df
# map9_df.to_csv('../Resources/map9_df.csv', index=False)
# map6_df.style.set_properties(align="center")
map9_df.style.format({'Offenders_per_1000': '{:.2f}'})

Unnamed: 0,County,Offense,Count,Population_2020,Offenders_per_1000
3510,Taylor,Drugs,400,139457,2.87
3062,Potter,Drugs,303,122706,2.47
2597,McLennan,Sexual_Assault,539,253066,2.13
3578,Tom Green,Drugs,254,123276,2.06
1458,Grayson,Drugs,262,131710,1.99
2070,Johnson,Drugs,301,171701,1.75
3384,Smith,Drugs,401,235143,1.71
1481,Gregg,Assault,191,125730,1.52
2035,Jefferson,Robbery,377,258678,1.46
2980,Parker,Drugs,189,135621,1.39


In [64]:
# Heat Map 9 - using data from Sort 23
# this gives a heat map for counties with pops  100k < x < 500k

# ---------------------

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locationshm9 = sorttop5hm9[["Latitude", "Longitude"]]

# Weight
bincounthm9 = sorttop5hm9["Offenders_per_1000"]

# Create heat layer
heat_layerhm9 = gmaps.heatmap_layer(locationshm9, weights=bincounthm9, 
                                 dissipating=False, max_intensity=10,
                                 point_radius=1)

# ---------------------------

info_box_templatehm9 = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>Offense</dt><dd>{Bin_Cat}</dd>
<dt>Prisoners_per_1000_pop</dt><dd>{Offenders_per_1000}</dd>
</dl>
"""
county_infohm9 = [info_box_templatehm9.format(**row) for index, row in t5sorttop5hm9.iterrows()]
locationshm9 = t5sorttop5hm9 [["Latitude", "Longitude"]]
marker_locations = [1,2,3,4,5,6,7,8,9,10]


# Add marker layer ontop of heat map
topc_layerhm9 = gmaps.symbol_layer(
    locationshm9, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=6)

# add pins with numbers and info box
markershm9 = gmaps.marker_layer(locations = locationshm9, 
                             label = [f" {x}" for x in marker_locations],
                             info_box_content=[f"{county}" for county in county_infohm9])

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

fig.add_layer(heat_layerhm9)
fig.add_layer(markershm9)
fig.add_layer(topc_layerhm9)

fig



Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

### This is the data set for counties with populations less than 10,000.  This is the 4th heatmap in the flow of the presentation.

In [52]:
# Data collection for visualization
# learned original pop_bins created were 
# actually to granular - therefore amended direction
# and went less detailed.  Detailed data
# did provide guidance for the final "slicing"
# for note - lower sort numbers were for sorts not used - 


# Sort 24 - Cities pop X < 10,000

inihm10 = (typlotalldata["Population_2020"] < 10000) 
inihm10 = inihm10.to_frame('crit')
hm10cri = typlotalldata.merge(inihm10, how = "outer", left_index=True, right_index=True)
hm10cri["crit"] = hm10cri["crit"]*1
hm10 = hm10cri.loc[hm10cri['crit'] == 1]
# hm9["County"].unique()
# hm9

# Sexual Assault
sexaslthm10_df = hm10.loc[hm10['Bin_Cat'] == "Sexual_Assault"]

# Assault
aslthm10_df = hm10.loc[hm10['Bin_Cat'] == "Assault"]

# Murder
murhm10_df = hm10.loc[hm10['Bin_Cat'] == "Murder"]

# Robbery
robhm10_df = hm10.loc[hm10['Bin_Cat'] == "Robbery"]

# Drugs
drugshm10_df = hm10.loc[hm10['Bin_Cat'] == "Drugs"]


frameshm10 = [sexaslthm10_df, aslthm10_df, murhm10_df, robhm10_df, drugshm10_df]
top5hm10 = pd.concat(frameshm10)
top5hm10

# coupophm7 = hm7.loc[:,"Population_2020"]
# nduphm7 = coupophm7.drop_duplicates().sum()
# # ndupgt50

sorttop5hm10 = top5hm10.sort_values(by=['Offenders_per_1000'], ascending=False)
sorttop5hm10.head(20)

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
2165,Kenedy,Drugs,8,476,0.476,0.0168,16.807,26.924094,-97.681378,LT1K,1
2436,Loving,Drugs,1,92,0.092,0.0109,10.87,31.84913,-103.579906,LT1K,1
557,Carson,Drugs,42,5799,5.799,0.0072,7.243,35.403468,-101.354204,1K_to_10K,1
288,Borden,Sexual_Assault,3,685,0.685,0.0044,4.38,32.743692,-101.431753,LT1K,1
1565,Hall,Assault,14,3305,3.305,0.0042,4.236,34.530741,-100.680988,1K_to_10K,1
846,Cottle,Assault,6,1510,1.51,0.004,3.974,34.077657,-100.278698,1K_to_10K,1
2680,Mills,Drugs,18,4870,4.87,0.0037,3.696,31.495243,-98.595527,1K_to_10K,1
679,Coleman,Drugs,31,8478,8.478,0.0037,3.657,31.773294,-99.453626,1K_to_10K,1
2206,Kimble,Sexual_Assault,15,4344,4.344,0.0035,3.453,30.486763,-99.748927,1K_to_10K,1
721,Collingsworth,Assault,11,3210,3.21,0.0034,3.427,34.964985,-100.270073,1K_to_10K,1


In [53]:
t5sorttop5hm10dups = sorttop5hm10.loc[sorttop5hm10['Offenders_per_1000'] > 3.3]
t5sorttop5hm10dups.head(20)

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
2165,Kenedy,Drugs,8,476,0.476,0.0168,16.807,26.924094,-97.681378,LT1K,1
2436,Loving,Drugs,1,92,0.092,0.0109,10.87,31.84913,-103.579906,LT1K,1
557,Carson,Drugs,42,5799,5.799,0.0072,7.243,35.403468,-101.354204,1K_to_10K,1
288,Borden,Sexual_Assault,3,685,0.685,0.0044,4.38,32.743692,-101.431753,LT1K,1
1565,Hall,Assault,14,3305,3.305,0.0042,4.236,34.530741,-100.680988,1K_to_10K,1
846,Cottle,Assault,6,1510,1.51,0.004,3.974,34.077657,-100.278698,1K_to_10K,1
2680,Mills,Drugs,18,4870,4.87,0.0037,3.696,31.495243,-98.595527,1K_to_10K,1
679,Coleman,Drugs,31,8478,8.478,0.0037,3.657,31.773294,-99.453626,1K_to_10K,1
2206,Kimble,Sexual_Assault,15,4344,4.344,0.0035,3.453,30.486763,-99.748927,1K_to_10K,1
721,Collingsworth,Assault,11,3210,3.21,0.0034,3.427,34.964985,-100.270073,1K_to_10K,1


In [54]:
# need to drop counties listed more than once to get highest value to present in heat map
t5sorttop5hm10 =  t5sorttop5hm10dups.drop_duplicates(subset=['County'])
t5sorttop5hm10

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
2165,Kenedy,Drugs,8,476,0.476,0.0168,16.807,26.924094,-97.681378,LT1K,1
2436,Loving,Drugs,1,92,0.092,0.0109,10.87,31.84913,-103.579906,LT1K,1
557,Carson,Drugs,42,5799,5.799,0.0072,7.243,35.403468,-101.354204,1K_to_10K,1
288,Borden,Sexual_Assault,3,685,0.685,0.0044,4.38,32.743692,-101.431753,LT1K,1
1565,Hall,Assault,14,3305,3.305,0.0042,4.236,34.530741,-100.680988,1K_to_10K,1
846,Cottle,Assault,6,1510,1.51,0.004,3.974,34.077657,-100.278698,1K_to_10K,1
2680,Mills,Drugs,18,4870,4.87,0.0037,3.696,31.495243,-98.595527,1K_to_10K,1
679,Coleman,Drugs,31,8478,8.478,0.0037,3.657,31.773294,-99.453626,1K_to_10K,1
2206,Kimble,Sexual_Assault,15,4344,4.344,0.0035,3.453,30.486763,-99.748927,1K_to_10K,1
721,Collingsworth,Assault,11,3210,3.21,0.0034,3.427,34.964985,-100.270073,1K_to_10K,1


In [55]:
# Create table for visualization
map10_df = t5sorttop5hm10[["County", "Bin_Cat", "Count", "Population_2020", "Offenders_per_1000"]]
map10_df = map10_df.rename(columns={"Bin_Cat": "Offense"})
# map6_df
# map10_df.to_csv('../Resources/map10_df.csv', index=False)
# map6_df.style.set_properties(align="center")
map10_df.style.format({'Offenders_per_1000': '{:.2f}'})

Unnamed: 0,County,Offense,Count,Population_2020,Offenders_per_1000
2165,Kenedy,Drugs,8,476,16.81
2436,Loving,Drugs,1,92,10.87
557,Carson,Drugs,42,5799,7.24
288,Borden,Sexual_Assault,3,685,4.38
1565,Hall,Assault,14,3305,4.24
846,Cottle,Assault,6,1510,3.97
2680,Mills,Drugs,18,4870,3.7
679,Coleman,Drugs,31,8478,3.66
2206,Kimble,Sexual_Assault,15,4344,3.45
721,Collingsworth,Assault,11,3210,3.43


In [66]:
# Heat Map 10 - using data from Sort 24
# this gives a heat map for counties with pops  x < 1000

# ----------------

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locationshm10 = sorttop5hm10[["Latitude", "Longitude"]]

# Weight
bincounthm10 = sorttop5hm10["Offenders_per_1000"]

# Create heat layer
heat_layerhm10 = gmaps.heatmap_layer(locationshm10, weights=bincounthm10, 
                                 dissipating=False, max_intensity=20,
                                 point_radius=1)

# ---------------------------

info_box_templatehm10 = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>Offense</dt><dd>{Bin_Cat}</dd>
<dt>Prisoners_per_1000_pop</dt><dd>{Offenders_per_1000}</dd>
</dl>
"""
county_infohm10 = [info_box_templatehm10.format(**row) for index, row in t5sorttop5hm10.iterrows()]
locationshm10 = t5sorttop5hm10 [["Latitude", "Longitude"]]
marker_locations = [1,2,3,4,5,6,7,8,9,10]


# Add marker layer ontop of heat map
topc_layerhm10 = gmaps.symbol_layer(
    locationshm10, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=6)

# add pins with numbers and info box
markershm10 = gmaps.marker_layer(locations = locationshm10, 
                             label = [f" {x}" for x in marker_locations],
                             info_box_content=[f"{county}" for county in county_infohm10])

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

fig.add_layer(heat_layerhm10)
fig.add_layer(markershm10)
fig.add_layer(topc_layerhm10)

fig


Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

In [65]:
# Heat Map 10  - using data from Sort 24

# Using the template add the marks to the heatmap

# info_box_templatehm10 = """
# <dl>
# <dt>County</dt><dd>{County}</dd>
# <dt>Offense</dt><dd>{Bin_Cat}</dd>
# <dt>Prisoners_per_1000_pop</dt><dd>{Offenders_per_1000}</dd>
# </dl>
# """
# county_infohm10 = [info_box_templatehm10.format(**row) for index, row in t5sorttop5hm10.iterrows()]
# locationshm10 = t5sorttop5hm10 [["Latitude", "Longitude"]]
# marker_locations = [1,2,3,4,5,6,7,8,9,10]


# # Add marker layer ontop of heat map
# topc_layerhm10 = gmaps.symbol_layer(
#     locationshm10, fill_color='rgba(0, 150, 0, 0.4)',
#     stroke_color='rgba(0, 0, 150, 0.4)', scale=6)


# markershm10 = gmaps.marker_layer(locations = locationshm10, 
#                              label = [f" {x}" for x in marker_locations],
#                              info_box_content=[f"{county}" for county in county_infohm10])

# figure_layout = {
#     'width': '800px',
#     'height': '600px',
#     'border': '1px solid black',
#     'padding': '1px',
#     'margin': '0 auto 0 auto'}

# fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# fig.add_layer(heat_layerhm10)
# fig.add_layer(markershm10)
# fig.add_layer(topc_layerhm10)

# fig