In [1]:
import pandas as pd
import numpy as np
import chart_studio.plotly as py
import json
import folium
from folium import Choropleth, CircleMarker, Marker
from folium.plugins import HeatMap, MarkerCluster
import cufflinks as cf
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
%matplotlib inline

from plotly.offline import download_plotlyjs, init_notebook_mode,plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

In [2]:
df = pd.read_excel(r"C:\Users\gurja\OneDrive\Project-3-Data-Visualization\Crime Data\2022_Texas_County_Crime.xlsx", 
                  engine = 'openpyxl')

#Make the names appear as they do in the json file
df['County'] = df['County'].apply(lambda x: ' '.join(word.capitalize() for word in x.split() if word.lower() != 'county'))
df.head()

#drop extra column
df = df.drop(columns=['Unnamed: 10'])

#fill in null values
df = df.fillna(1)

df

Unnamed: 0,County,Population,Murder,Rape,Robbery,Assault,Burglary,Larceny,Auto Theft,Total
0,Anderson,20230.0,1.0,32.0,7.0,99.0,124.0,356.0,32.0,651.0
1,Andrews,18380.0,1.0,5.0,0.0,60.0,32.0,192.0,22.0,312.0
2,Angelina,82128.0,6.0,78.0,26.0,216.0,410.0,1296.0,187.0,2219.0
3,Aransas,31050.0,3.0,19.0,9.0,134.0,151.0,523.0,54.0,893.0
4,Archer,1593.0,0.0,1.0,0.0,0.0,4.0,2.0,2.0,9.0
...,...,...,...,...,...,...,...,...,...,...
249,Wood,47838.0,1.0,12.0,11.0,64.0,108.0,246.0,36.0,478.0
250,Yoakum,7591.0,0.0,0.0,0.0,17.0,11.0,34.0,8.0,70.0
251,Young,18225.0,0.0,5.0,2.0,24.0,23.0,98.0,16.0,168.0
252,Zapata,14016.0,0.0,0.0,0.0,5.0,12.0,20.0,0.0,37.0


In [3]:
df.isin([np.inf, -np.inf]).any()

County        False
Population    False
Murder        False
Rape          False
Robbery       False
Assault       False
Burglary      False
Larceny       False
Auto Theft    False
Total         False
dtype: bool

In [4]:
crime_columns = ['Murder', 'Rape', 'Robbery', 'Assault', 'Burglary', 'Larceny', 'Auto Theft', 'Total']

for crime in crime_columns:
    rate_column = f'{crime} Rate'
    df[rate_column] = (df[crime] / df['Population']) * 100000
    
df

Unnamed: 0,County,Population,Murder,Rape,Robbery,Assault,Burglary,Larceny,Auto Theft,Total,Murder Rate,Rape Rate,Robbery Rate,Assault Rate,Burglary Rate,Larceny Rate,Auto Theft Rate,Total Rate
0,Anderson,20230.0,1.0,32.0,7.0,99.0,124.0,356.0,32.0,651.0,4.943154,158.180919,34.602076,489.372219,612.951063,1759.762729,158.180919,3217.993080
1,Andrews,18380.0,1.0,5.0,0.0,60.0,32.0,192.0,22.0,312.0,5.440696,27.203482,0.000000,326.441785,174.102285,1044.613711,119.695321,1697.497280
2,Angelina,82128.0,6.0,78.0,26.0,216.0,410.0,1296.0,187.0,2219.0,7.305669,94.973700,31.657900,263.004091,499.220729,1578.024547,227.693357,2701.879992
3,Aransas,31050.0,3.0,19.0,9.0,134.0,151.0,523.0,54.0,893.0,9.661836,61.191626,28.985507,431.561997,486.312399,1684.380032,173.913043,2876.006441
4,Archer,1593.0,0.0,1.0,0.0,0.0,4.0,2.0,2.0,9.0,0.000000,62.774639,0.000000,0.000000,251.098556,125.549278,125.549278,564.971751
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,Wood,47838.0,1.0,12.0,11.0,64.0,108.0,246.0,36.0,478.0,2.090388,25.084661,22.994272,133.784857,225.761947,514.235545,75.253982,999.205652
250,Yoakum,7591.0,0.0,0.0,0.0,17.0,11.0,34.0,8.0,70.0,0.000000,0.000000,0.000000,223.949414,144.908444,447.898828,105.387959,922.144645
251,Young,18225.0,0.0,5.0,2.0,24.0,23.0,98.0,16.0,168.0,0.000000,27.434842,10.973937,131.687243,126.200274,537.722908,87.791495,921.810700
252,Zapata,14016.0,0.0,0.0,0.0,5.0,12.0,20.0,0.0,37.0,0.000000,0.000000,0.000000,35.673516,85.616438,142.694064,0.000000,263.984018


In [5]:
df.isin([np.inf, -np.inf]).any()

County             False
Population         False
Murder             False
Rape               False
Robbery            False
Assault            False
Burglary           False
Larceny            False
Auto Theft         False
Total              False
Murder Rate        False
Rape Rate           True
Robbery Rate       False
Assault Rate       False
Burglary Rate      False
Larceny Rate        True
Auto Theft Rate    False
Total Rate          True
dtype: bool

In [6]:
non_rate_columns = ['Population', 'Murder', 'Rape', 'Robbery', 'Assault', 'Burglary', 'Larceny', 'Auto Theft', 'Total']
df[non_rate_columns] = df[non_rate_columns].replace(0, 1)
df.isin([np.inf, -np.inf]).any()

County             False
Population         False
Murder             False
Rape               False
Robbery            False
Assault            False
Burglary           False
Larceny            False
Auto Theft         False
Total              False
Murder Rate        False
Rape Rate           True
Robbery Rate       False
Assault Rate       False
Burglary Rate      False
Larceny Rate        True
Auto Theft Rate    False
Total Rate          True
dtype: bool

In [7]:
import plotly.graph_objects as go
import pandas as pd

# Assuming you have a DataFrame named df

# Define colors for each population bin
colors = ['blue', 'green', 'orange', 'red']

# Exclude counties with populations from 0 to 10,000, be
filtered_df = df[df['Population'] > 10000]

# Define the updated population bins
population_bins = [10000, 25000, 50000, 100000, float('inf')]
population_labels = ['10k-25k', '25k-50k', '50k-100k', '100k+']

# Create a new column in df for the updated population size category
filtered_df['Population Category'] = pd.cut(filtered_df['Population'], bins=population_bins, labels=population_labels, right=False)

# Bar Chart
fig = go.Figure()

# crimes loop
for crime_rate in ['Murder Rate', 'Rape Rate', 'Robbery Rate', 'Assault Rate', 'Burglary Rate', 'Larceny Rate', 'Auto Theft Rate']:
    # Loop through population bins
    for i, pop_bin in enumerate(population_labels):
        avg_rate = filtered_df.loc[filtered_df['Population Category'] == pop_bin, crime_rate].mean()
        # Set showlegend to False to hide the legend
        fig.add_trace(go.Bar(x=[crime_rate], y=[avg_rate], marker_color=colors[i], showlegend=False))

# total rate, didnt work
fig.update_layout(
    yaxis2=dict(
        title='Total Rate',
        overlaying='y',
        side='right',
        showgrid=False,
        zeroline=False,
        range=[0, filtered_df['Total Rate'].max() + 500]
    )
)

# Update the layout
fig.update_layout(
    barmode='group',
    title='Average Crime Rates by County Population 2022',
    xaxis_title='Crime Rate',
    yaxis_title='Rate per 100,000 people',
    height=600
)

# Show 
fig.show()


In [8]:
import plotly.express as px

# scatter plot
fig = px.scatter(
    df,
    x='Burglary',
    y='Population',
    hover_data=['County'],
    title='Burglary in Texas Counties 2022',
    labels={'Burglary': 'Burglary Count', 'Population': 'Population'},
    template='plotly_dark',  # Choose a dark template for aesthetics
    color_discrete_sequence=['yellow'],  # Choose a color for the points
    opacity=0.7,  # Set point opacity
)

# Titles
fig.update_layout(
    xaxis_title='Burglary Count',
    yaxis_title=' County Population',
    height=600,
    showlegend=False  # Hide legend for a cleaner look
)

# Show
fig.show()
