In [None]:
import pandas as pd
import numpy as py
import seaborn as sns 
import matplotlib.pyplot as plt
import math
import scipy.stats as st
import scipy.stats as stats
import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader


In [None]:
# Teammate Chris code begins here
# changes here
state_name_list = [ "Alabama", "Alaska", "Arizona", "Arkansas", "California",
                    "Colorado", "Connecticut", "DC", "Delaware", "Florida",
                    "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana",
                    "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine",
                    "Maryland", "Massachusetts", "Michigan", "Minnesota", 
                    "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada",
                    "New_Hampshire", "New_Jersey", "New_Mexico", "New_York", "North_Carolina",
                    "North_Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
                    "Rhode_Island", "South_Carolina", "South_Dakota", "Tennessee", "Texas",
                    "Utah", "Vermont", "Virginia", "Washington", "West_Virginia", 
                    "Wisconsin", "Wyoming"
                  ]

# An empty collection of dataframes
crime_by_state = {}


debug = False


print("Importing *.csv files...")

# Base file path we will use for reusable code
base_file_path = "Raw_Data/CrimeStateByState_"

# Import the U.S. crime data totals
file_path = base_file_path + "US" + ".csv"
if (debug):
    print(f"Importing file: {file_path}")
us_totals_df = pd.read_csv(file_path, header=5)

# Iterate over and import the state-by-state crime data
for state in state_name_list:
    file_path = base_file_path + state + ".csv"
    if (debug):
        print(f"Importing file: {file_path}")
    crime_by_state[state] = pd.read_csv(file_path, header=5)

# Import the gun ownership data
file_path = "Raw_Data/gun_ownership_by_state.csv"
if (debug):
    print(f"Importing file: {file_path}")
gun_ownership_df = pd.read_csv(file_path)

# Import the gun ownership data
file_path = "Raw_Data/State_by_State_Poverty_Rate.csv"
if (debug):
    print(f"Importing file: {file_path}")
poverty_rate_df = pd.read_csv(file_path, header=2)

print("File import complete.")

# Clean up the footer information that was at the bottom of each crime data .csv file
if debug:
    print("Cleaning up the .csv files...")
    
for state in state_name_list:
    if debug:
        print(f"The current state is {state}...")
    crime_by_state[state] = crime_by_state[state][crime_by_state[state]["Year"].astype(str).str.isnumeric()]
    
if debug:
    print(f"The current state is {state}...")
us_totals_df = us_totals_df[us_totals_df["Year"].astype(str).str.isnumeric()]

# Clean up the poverty data
poverty_rate_df["2018 Poverty Rate"] = poverty_rate_df["2018 Poverty Rate"].str.rstrip('%').apply(pd.to_numeric, errors="coerce")
poverty_rate_df["2014 Poverty Rate"] = poverty_rate_df["2014 Poverty Rate"].str.rstrip('%').apply(pd.to_numeric, errors="coerce")
poverty_rate_df["Supplemental Poverty Measure"] = poverty_rate_df["Supplemental Poverty Measure"].str.rstrip('%').apply(pd.to_numeric, errors="coerce")

if debug:
    print("Cleanup of .csv files is complete.")


In [None]:
# Reindex the gun ownership data
try:
    gun_ownership_df.set_index("State", inplace=True, drop=False)
    poverty_rate_df.set_index("State", inplace=True)
    poverty_rate_df = poverty_rate_df.drop(columns=["Rank"], axis=1)
except:
    pass

In [None]:
all_state_crime_2014 = pd.DataFrame()

# Build a dataframe of the 2014 crime data of all the states and index it to the state name
for state in state_name_list:

    if debug:
        print(f"The current state is {state}...")
    # Give each row the state name so that it can be used as an index later
    crime_by_state[state]["State"] = state
    # Use "this_state" as short-hand
    this_state = crime_by_state[state]
    # Grab the specific row we want
    this_state_crime_2014 = this_state[this_state["Year"] == "2014"]
    # We do this because .append() doesn't support "inplace"
    all_state_crime_2014 = all_state_crime_2014.append(this_state_crime_2014)
    

all_state_crime_2014.set_index(["State"], inplace=True)
# Remove bad columns
all_state_crime_2014 = all_state_crime_2014.loc[:, ~all_state_crime_2014.columns.str.contains('^Unnamed')]

try:
    gun_ownership_df = gun_ownership_df.join(all_state_crime_2014, how="outer")
    gun_ownership_df = gun_ownership_df.join(poverty_rate_df, how="outer")
    
except:
    pass

# Remove bad columns
# gun_ownership_df = gun_ownership_df.loc[:, ~gun_ownership_df.columns.str.contains('^Unnamed')]

# Drop data for US, Puerto Rico, and American Samoa
gun_ownership_df = gun_ownership_df.dropna(thresh=5)

In [None]:
# dataPoints = ['Violent crime total']
# for col in dataPoints:
#     dat = []
#     for stateName in gun_ownership_df.index:
#         if stateName in crime_by_state:
#             dat.append(crime_by_state[stateName].tail(1)[col].values[0])
#         else:
#             dat.append(None)
#     gun_ownership_df[col] = dat

gun_ownership_df.head(60)


In [None]:
# Teammate Chris code ends here

In [None]:
# Teammate Carrie code begins here- Making Heat Maps 

In [None]:
#import gun ownership data, create dataframe
gun_ownership_df=pd.read_csv("Raw_Data/gun_ownership_by_state.csv")
gun_ownership_df.set_index("State",inplace=True)
#gun_ownership_df



In [None]:
#create dataframe of just state and guns 
df2= gun_ownership_df[[ "# of guns per capita"]]
df2
gun_ownership_dict=df2.to_dict("dict")
gun_ownership_dict2={key: float(value) for (key, value) in gun_ownership_dict["# of guns per capita"].items() if key != "North"}


In [None]:
#gun_ownership_dict["# of guns per capita"]

In [None]:

# Create HeatMap 

fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1], projection=ccrs.LambertConformal())

ax.set_extent([-125, -66.5, 20, 50], ccrs.Geodetic())

shapename = 'admin_1_states_provinces_lakes_shp'
states_shp = shpreader.natural_earth(resolution='110m',
                                     category='cultural', name=shapename)

gun_ownership_dict2
  

ax.background_patch.set_visible(False)
ax.outline_patch.set_visible(False)

ax.set_title('Gun Ownership Per Capita Density ')

#for state in shpreader.Reader(states_shp).geometries():
for astate in shpreader.Reader(states_shp).records():

    ### You want to replace the following code with code that sets the
    ### facecolor as a gradient based on the population density above
    #facecolor = [0.9375, 0.9375, 0.859375]

    edgecolor = 'black'

    try:
        # use the name of this state to get pop_density
        state_dens = gun_ownership_dict2[ astate.attributes['name'] ]
    except:
        state_dens = 0
    #print(state_dens)

    # simple scheme to assign color to each state
    scaled_state_dens=math.sqrt(state_dens)
    scaling_factor=1.0/math.sqrt(229.24)
    
    facecolor= (scaling_factor*scaled_state_dens, scaling_factor*scaled_state_dens,scaling_factor*scaled_state_dens)

    # `astate.geometry` is the polygon to plot
    ax.add_geometries([astate.geometry], ccrs.PlateCarree(),
                      facecolor=facecolor, edgecolor=edgecolor)

#plt.show()


In [None]:
crime_by_state_df=pd.read_csv("Raw_Data/CrimeStatebyState_US.csv")
#crime_by_state_df.head()

In [None]:
fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1], projection=ccrs.LambertConformal())

ax.set_extent([-125, -66.5, 20, 50], ccrs.Geodetic())

shapename = 'admin_1_states_provinces_lakes_shp'
states_shp = shpreader.natural_earth(resolution='110m',
                                     category='cultural', name=shapename)

gun_ownership_dict2
  

ax.background_patch.set_visible(False)
ax.outline_patch.set_visible(False)

ax.set_title('Gun Ownership Per Capita Density ')

#for state in shpreader.Reader(states_shp).geometries():
for astate in shpreader.Reader(states_shp).records():

    ### You want to replace the following code with code that sets the
    ### facecolor as a gradient based on the population density above
    #facecolor = [0.9375, 0.9375, 0.859375]

    edgecolor = 'black'

    try:
        # use the name of this state to get pop_density
        state_dens = gun_ownership_dict2[ astate.attributes['name'] ]
    except:
        state_dens = 0
    #print(state_dens)

    # simple scheme to assign color to each state
    scaled_state_dens=math.sqrt(state_dens)
    scaling_factor=1.0/math.sqrt(229.24)
    
    facecolor= (scaling_factor*scaled_state_dens, scaling_factor*scaled_state_dens,scaling_factor*scaled_state_dens)

    # `astate.geometry` is the polygon to plot
    ax.add_geometries([astate.geometry], ccrs.PlateCarree(),
                      facecolor=facecolor, edgecolor=edgecolor)

#plt.show()

In [None]:
# Heat map, robbery by state

In [None]:
#all_state_crime_2014.head()

In [None]:
fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1], projection=ccrs.LambertConformal())

ax.set_extent([-125, -66.5, 20, 50], ccrs.Geodetic())

shapename = 'admin_1_states_provinces_lakes_shp'
states_shp = shpreader.natural_earth(resolution='110m',
                                     category='cultural', name=shapename)

murder_state_df
  

ax.background_patch.set_visible(False)
ax.outline_patch.set_visible(False)

ax.set_title('Murder by Per Capita Density')

#for state in shpreader.Reader(states_shp).geometries():
for astate in shpreader.Reader(states_shp).records():

    ### You want to replace the following code with code that sets the
    ### facecolor as a gradient based on the population density above
    #facecolor = [0.9375, 0.9375, 0.859375]

    edgecolor = 'black'

    try:
        # use the name of this state to get pop_density
        state_dens = gun_ownership_dict2[ astate.attributes['name'] ]
    except:
        state_dens = 0
    #print(state_dens)

    # simple scheme to assign color to each state
    scaled_state_dens=math.sqrt(state_dens)
    scaling_factor=1.0/math.sqrt(229.24)
    
    facecolor= (scaling_factor*scaled_state_dens, scaling_factor*scaled_state_dens,scaling_factor*scaled_state_dens)

    # `astate.geometry` is the polygon to plot
    ax.add_geometries([astate.geometry], ccrs.PlateCarree(),
                      facecolor=facecolor, edgecolor=edgecolor)

#plt.show()

In [None]:
# Heat map, motor vechile theft by state 
#all_state_crime_2014

In [None]:
# Create a data frame for just states and murder: 
murder_state_df=pd.DataFrame()
murder_state_df= all_state_crime_2014["Murder and nonnegligent Manslaughter"]
#murder_state_df

In [None]:
# Create heat map aggrevated assault 
# Create a data frame for just states and murder: 
assault_state_df=pd.DataFrame()
assault_state_df= all_state_crime_2014["Aggravated assault"]
#assault_state_df

In [None]:
# Create the map 
fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1], projection=ccrs.LambertConformal())

ax.set_extent([-125, -66.5, 20, 50], ccrs.Geodetic())

shapename = 'admin_1_states_provinces_lakes_shp'
states_shp = shpreader.natural_earth(resolution='110m',
                                     category='cultural', name=shapename)

assault_state_df
  

ax.background_patch.set_visible(False)
ax.outline_patch.set_visible(False)

ax.set_title('Aggravated assault by Per Capita Density')

#for state in shpreader.Reader(states_shp).geometries():
for astate in shpreader.Reader(states_shp).records():

    ### You want to replace the following code with code that sets the
    ### facecolor as a gradient based on the population density above
    #facecolor = [0.9375, 0.9375, 0.859375]

    edgecolor = 'black'

    try:
        # use the name of this state to get pop_density
        state_dens = gun_ownership_dict2[ astate.attributes['name'] ]
    except:
        state_dens = 0
    #print(state_dens)

    # simple scheme to assign color to each state
    scaled_state_dens=math.sqrt(state_dens)
    scaling_factor=1.0/math.sqrt(229.24)
    
    facecolor= (scaling_factor*scaled_state_dens, scaling_factor*scaled_state_dens,scaling_factor*scaled_state_dens)

    # `astate.geometry` is the polygon to plot
    ax.add_geometries([astate.geometry], ccrs.PlateCarree(),
                      facecolor=facecolor, edgecolor=edgecolor)

#plt.show()

In [None]:
robbery_state_df=pd.DataFrame()
robbery_state_df= all_state_crime_2014["Robbery"]
#robbery_state_df


In [None]:
# Create the map 
fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1], projection=ccrs.LambertConformal())

ax.set_extent([-125, -66.5, 20, 50], ccrs.Geodetic())

shapename = 'admin_1_states_provinces_lakes_shp'
states_shp = shpreader.natural_earth(resolution='110m',
                                     category='cultural', name=shapename)

robbery_state_df
  

ax.background_patch.set_visible(False)
ax.outline_patch.set_visible(False)

ax.set_title('Robbery by Per Capita Density')

#for state in shpreader.Reader(states_shp).geometries():
for astate in shpreader.Reader(states_shp).records():

    ### You want to replace the following code with code that sets the
    ### facecolor as a gradient based on the population density above
    #facecolor = [0.9375, 0.9375, 0.859375]

    edgecolor = 'black'

    try:
        # use the name of this state to get pop_density
        state_dens = gun_ownership_dict2[ astate.attributes['name'] ]
    except:
        state_dens = 0
    #print(state_dens)

    # simple scheme to assign color to each state
    scaled_state_dens=math.sqrt(state_dens)
    scaling_factor=1.0/math.sqrt(229.24)
    
    facecolor= (scaling_factor*scaled_state_dens, scaling_factor*scaled_state_dens,scaling_factor*scaled_state_dens)

    # `astate.geometry` is the polygon to plot
    ax.add_geometries([astate.geometry], ccrs.PlateCarree(),
                      facecolor=facecolor, edgecolor=edgecolor)

#plt.show()

In [None]:
# Create Heat Map motor vechile theft 
motor_theft_state_df=pd.DataFrame()
motor_theft_state_df= all_state_crime_2014["Motor vehicle theft rate"]
#motor_theft_state_df


In [None]:
# Create the map 
fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1], projection=ccrs.LambertConformal())

ax.set_extent([-125, -66.5, 20, 50], ccrs.Geodetic())

shapename = 'admin_1_states_provinces_lakes_shp'
states_shp = shpreader.natural_earth(resolution='110m',
                                     category='cultural', name=shapename)

motor_theft_state_df
  

ax.background_patch.set_visible(False)
ax.outline_patch.set_visible(False)

ax.set_title('Motor Vechile Theft by Per Capita Density')

#for state in shpreader.Reader(states_shp).geometries():
for astate in shpreader.Reader(states_shp).records():

    ### You want to replace the following code with code that sets the
    ### facecolor as a gradient based on the population density above
    #facecolor = [0.9375, 0.9375, 0.859375]

    edgecolor = 'black'

    try:
        # use the name of this state to get pop_density
        state_dens = gun_ownership_dict2[ astate.attributes['name'] ]
    except:
        state_dens = 0
    #print(state_dens)

    # simple scheme to assign color to each state
    scaled_state_dens=math.sqrt(state_dens)
    scaling_factor=1.0/math.sqrt(229.24)
    
    facecolor= (scaling_factor*scaled_state_dens, scaling_factor*scaled_state_dens,scaling_factor*scaled_state_dens)

    # `astate.geometry` is the polygon to plot
    ax.add_geometries([astate.geometry], ccrs.PlateCarree(),
                      facecolor=facecolor, edgecolor=edgecolor)

#plt.show()

In [None]:
# Create Burgularly Heat Map 
burg_state_df=pd.DataFrame()
burg_state_df= all_state_crime_2014["Burglary rate"]
#burg_state_df

In [None]:
# Create the map 
fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1], projection=ccrs.LambertConformal())

ax.set_extent([-125, -66.5, 20, 50], ccrs.Geodetic())

shapename = 'admin_1_states_provinces_lakes_shp'
states_shp = shpreader.natural_earth(resolution='110m',
                                     category='cultural', name=shapename)

burg_state_df
  

ax.background_patch.set_visible(False)
ax.outline_patch.set_visible(False)

ax.set_title('Burglary rate by Per Capita Density')

#for state in shpreader.Reader(states_shp).geometries():
for astate in shpreader.Reader(states_shp).records():

    ### You want to replace the following code with code that sets the
    ### facecolor as a gradient based on the population density above
    #facecolor = [0.9375, 0.9375, 0.859375]

    edgecolor = 'black'

    try:
        # use the name of this state to get pop_density
        state_dens = gun_ownership_dict2[ astate.attributes['name'] ]
    except:
        state_dens = 0
    #print(state_dens)

    # simple scheme to assign color to each state
    scaled_state_dens=math.sqrt(state_dens)
    scaling_factor=1.0/math.sqrt(229.24)
    
    facecolor= (scaling_factor*scaled_state_dens, scaling_factor*scaled_state_dens,scaling_factor*scaled_state_dens)

    # `astate.geometry` is the polygon to plot
    ax.add_geometries([astate.geometry], ccrs.PlateCarree(),
                      facecolor=facecolor, edgecolor=edgecolor)

#plt.show()

In [None]:
gun_ownership_by_state.txt="gun_ownership_by_state.txt"
#gun_ownership_by_state.txt.head()


In [None]:
with open ("gun_ownership_by_state.txt", "r") as myfile:
    gun_ownership=myfile.readlines()

In [None]:
# Teammate Carrie code ends here

In [None]:
# Teammate Matt code begins here

# Question 1: Is there a relationship between gun ownership and the murder/nonnegligent manslaughter rate?

In [None]:
# create figure and axis objects with subplots()
fig,ax = plt.subplots(figsize=(20, 10))

# make a plot
ax.plot(gun_ownership_df["State"], gun_ownership_df["Murder and nonnegligent manslaughter rate"], color="red", marker="o")
# set x-axis label
ax.set_xlabel("State", color = "purple", fontsize=22)
# set y-axis label
ax.set_ylabel("Murder and manslaughter rate",color="red",fontsize=22)
plt.xticks(gun_ownership_df["State"], rotation=90, fontsize = 16)

# twin object for two different y-axis on the sample plot
ax2=ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(gun_ownership_df["State"], gun_ownership_df["# of guns per capita"], color="blue",marker="o")
ax2.set_ylabel("# of guns per capita",color="blue",fontsize=22)
plt.xticks(gun_ownership_df["State"], rotation=90, )



plt.show()
# save the plot as a file
fig.savefig('two_different_y_axis_for_single_python_plot_with_twinx.jpg',
            format='jpeg',
            dpi=100,
            bbox_inches='tight')

plt.show()

# Question 2: Is there a relationship between gun ownership and aggravated assault? 


In [None]:
    
fig,ax = plt.subplots(figsize=(20, 10))
# make a plot
ax.plot(gun_ownership_df["State"], gun_ownership_df["Aggravated assault rate"], color="red", marker="o")
# set x-axis label
ax.set_xlabel("State",color = "purple", fontsize=22)
# set y-axis label
ax.set_ylabel("Aggravated assault rate",color="red",fontsize=22)
plt.xticks(gun_ownership_df["State"], rotation=90, fontsize = 16)

# twin object for two different y-axis on the sample plot
ax2=ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(gun_ownership_df["State"], gun_ownership_df["# of guns per capita"], color="blue",marker="o")
ax2.set_ylabel("# of guns per capita",color="blue",fontsize=22)
plt.xticks(gun_ownership_df["State"], rotation=90, )


plt.show()
# save the plot as a file
fig.savefig('two_different_y_axis_for_single_python_plot_with_twinx.jpg',
            format='jpeg',
            dpi=100,
            bbox_inches='tight')

plt.show()

# Question 2: Is there a relationship between robbery and gun ownership? 

In [None]:

# create figure and axis objects with subplots()
fig,ax = plt.subplots(figsize=(20, 10))
# make a plot
ax.plot(gun_ownership_df["State"], gun_ownership_df["Robbery rate"], color="red", marker="o")
# set x-axis label
ax.set_xlabel("State",color = "purple", fontsize=22)
# set y-axis label
ax.set_ylabel("Robbery rate",color="red",fontsize=22)
plt.xticks(gun_ownership_df["State"], rotation=90, fontsize = 16)

# twin object for two different y-axis on the sample plot
ax2=ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(gun_ownership_df["State"], gun_ownership_df["# of guns per capita"], color="blue",marker="o")
ax2.set_ylabel("# of guns per capita",color="blue",fontsize=22)
plt.xticks(gun_ownership_df["State"], rotation=90, )


plt.show()
# save the plot as a file
fig.savefig('two_different_y_axis_for_single_python_plot_with_twinx.jpg',
            format='jpeg',
            dpi=100,
            bbox_inches='tight')

plt.show()

# Question 3: Is there a relationship between motor vehicle theft and gun ownership?


In [None]:
# create figure and axis objects with subplots()
fig,ax = plt.subplots(figsize=(20, 10))
# make a plot
ax.plot(gun_ownership_df["State"], gun_ownership_df["Motor vehicle theft rate"], color="red", marker="o")
# set x-axis label
ax.set_xlabel("State", color = "purple", fontsize=22)
# set y-axis label
ax.set_ylabel("Motor vehicle theft rate",color="red",fontsize=22)
plt.xticks(gun_ownership_df["State"], rotation=90, fontsize = 16)

# twin object for two different y-axis on the sample plot
ax2=ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(gun_ownership_df["State"], gun_ownership_df["# of guns per capita"], color="blue",marker="o")
ax2.set_ylabel("# of guns per capita",color="blue",fontsize=22)
plt.xticks(gun_ownership_df["State"], rotation=90, )


plt.show()
# save the plot as a file
fig.savefig('two_different_y_axis_for_single_python_plot_with_twinx.jpg',
            format='jpeg',
            dpi=100,
            bbox_inches='tight')

plt.show()

# Question 4: Is there a relationship between burglary and gun ownership? 

In [None]:

# create figure and axis objects with subplots()
fig,ax = plt.subplots(figsize=(20, 10))
# make a plot
ax.plot(gun_ownership_df["State"], gun_ownership_df["Burglary rate"], color="red", marker="o")
# set x-axis label
ax.set_xlabel("State", color = "purple", fontsize=22)
# set y-axis label
ax.set_ylabel("Burglary rate",color="red",fontsize=22)
plt.xticks(gun_ownership_df["State"], rotation=90, fontsize = 16)

# twin object for two different y-axis on the sample plot
ax2=ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(gun_ownership_df["State"], gun_ownership_df["# of guns per capita"], color="blue",marker="o")
ax2.set_ylabel("# of guns per capita",color="blue",fontsize=22)
plt.xticks(gun_ownership_df["State"], rotation=90, )


plt.show()
# save the plot as a file
fig.savefig('two_different_y_axis_for_single_python_plot_with_twinx.jpg',
            format='jpeg',
            dpi=100,
            bbox_inches='tight')

plt.show()

In [None]:
# Teammate Matt code ends here

In [None]:
# Teammate Glenda code begins here

In [None]:
# using gun_ownership_per_capita_df, extract the necessary columns
gun_ownership_per_capita_df = gun_ownership_df
gun_ownership_per_capita_df = gun_ownership_per_capita_df.reset_index(drop=True)
gun_ownership_per_capita_df = gun_ownership_per_capita_df[["State","# of guns per capita"]]
gun_ownership_per_capita_df.head()

In [None]:
# using all_state_crime_2014_df, reset index to make the State index a column
all_state_crime_2014_df = all_state_crime_2014
all_state_crime_2014_df = all_state_crime_2014_df.reset_index(drop=False)
all_state_crime_2014_df.head()

In [None]:
# extract murder and non-negligent manslaughter rate data
murder_manslaughter_2014_df = all_state_crime_2014_df[["State","Murder and nonnegligent manslaughter rate"]]
murder_manslaughter_2014_df.head()

In [None]:
# extract aggravated assault rate data
aggravated_assault_2014_df = all_state_crime_2014_df[["State","Aggravated assault rate"]]
aggravated_assault_2014_df.head()

In [None]:
# extract motor vehicle theft rate data
vehicle_theft_2014_df = all_state_crime_2014_df[["State","Motor vehicle theft rate"]]
vehicle_theft_2014_df.head()

In [None]:
# extract robbery rate data
robbery_2014_df = all_state_crime_2014_df[["State","Robbery rate"]]
robbery_2014_df.head()

In [None]:
# extract burglary rate data
burglary_2014_df = all_state_crime_2014_df[["State","Burglary rate"]]
burglary_2014_df.head()

In [None]:
# import plotly.express to create bubble charts
import plotly.express as px

In [None]:
# display bubble chart for guns ownership per capita vs burglary and save into html
combined_data_df = pd.merge(burglary_2014_df,gun_ownership_per_capita_df,on="State")
fig = px.scatter(combined_data_df,
                 x="Burglary rate",
                 y="# of guns per capita",
                 size="Burglary rate",
                 color="State",
                 hover_name="State",
                 title="# of Guns Per Capita vs Burglary Rate in the US"
                )
fig.update_layout(
    title_font_color="MediumSlateBlue",
    title={'y':0.9,
           'x':0.5,
           'xanchor': 'center',
           'yanchor': 'top'})
fig.show()
fig.write_html("Images/gunsVSburglary.html")

In [None]:
# display bubble chart for guns ownership per capita vs robbery and save into html
combined_data_df = pd.merge(robbery_2014_df,gun_ownership_per_capita_df,on="State")
fig = px.scatter(combined_data_df,
                 x="Robbery rate",
                 y="# of guns per capita",
                 size="Robbery rate",
                 color="State",
                 hover_name="State",
                 title="# of Guns Per Capita vs Robbery Rate in the US"
                )
fig.update_layout(
    title_font_color="LightSeaGreen",
    title={'y':0.9,
           'x':0.5,
           'xanchor': 'center',
           'yanchor': 'top'})
fig.show()
fig.write_html("Images/gunsVSrobbery.html")

In [None]:
# display bubble chart for guns ownership per capita vs motor vehicle theft and save into html
combined_data_df = pd.merge(vehicle_theft_2014_df,gun_ownership_per_capita_df,on="State")
fig = px.scatter(combined_data_df,
                 x="Motor vehicle theft rate",
                 y="# of guns per capita",
                 size="Motor vehicle theft rate",
                 color="State",
                 hover_name="State",
                 title="# of Guns Per Capita vs Motor Vehicle Theft Rate in the US"
                )
fig.update_layout(
    title_font_color="LightSalmon",
    title={'y':0.9,
           'x':0.5,
           'xanchor': 'center',
           'yanchor': 'top'})
fig.show()
fig.write_html("Images/gunsVSvehicletheft.html")

In [None]:
# display bubble chart for guns ownership per capita vs assault and save into html
combined_data_df = pd.merge(aggravated_assault_2014_df,gun_ownership_per_capita_df,on="State")
fig = px.scatter(combined_data_df,
                 x="Aggravated assault rate",
                 y="# of guns per capita",
                 size="Aggravated assault rate",
                 color="State",
                 hover_name="State",
                 title="# of Guns Per Capita vs Aggravated Assault Rate in the US"
                )
fig.update_layout(
    title_font_color="green",
    title={'y':0.9,
           'x':0.5,
           'xanchor': 'center',
           'yanchor': 'top'})
fig.show()
fig.write_html("Images/gunsVSassault.html")

In [None]:
# display bubble chart for guns ownership per capita vs murder and non-negligent manslaughter and save into html
combined_data_df = pd.merge(murder_manslaughter_2014_df,gun_ownership_per_capita_df,on="State")
fig = px.scatter(combined_data_df,
                 x="Murder and nonnegligent manslaughter rate",
                 y="# of guns per capita",
                 size="Murder and nonnegligent manslaughter rate",
                 color="State",
                 hover_name="State",
                 title="# of Guns Per Capita vs Murder and Non-Negligent Manslaughter Rate in the US"
                )
fig.update_layout(
    title_font_color="purple",
    title={'y':0.9,
           'x':0.5,
           'xanchor': 'center',
            'yanchor': 'top'})
fig.show()
fig.write_html("Images/gunsVSmurder.html")

In [None]:
# Hypothesis Testing

In [None]:
# sort 50 states and DC according the guns per capita in descending order for grouping
sorted_gun_ownership = gun_ownership_df.sort_values(["# of guns per capita"], ascending = False)
sorted_gun_ownership.head()

In [None]:
# divide data into 5 groups for ANOVA testing
first10_sorted_gun_ownership = sorted_gun_ownership.iloc[0:10,:]
first10_sorted_gun_ownership
second10_sorted_gun_ownership = sorted_gun_ownership.iloc[10:20,:]
second10_sorted_gun_ownership
third10_sorted_gun_ownership = sorted_gun_ownership.iloc[20:30,:]
third10_sorted_gun_ownership
fourth10_sorted_gun_ownership = sorted_gun_ownership.iloc[30:40,:]
fourth10_sorted_gun_ownership
fifth11_sorted_gun_ownership = sorted_gun_ownership.iloc[40:51,:]
fifth11_sorted_gun_ownership

In [None]:
# ANOVA test

In [None]:
# calculate the p-value for murder and non-negligent manslaughter
murder1 = first10_sorted_gun_ownership["Murder and nonnegligent manslaughter rate"]
murder2 = second10_sorted_gun_ownership["Murder and nonnegligent manslaughter rate"]
murder3 = third10_sorted_gun_ownership["Murder and nonnegligent manslaughter rate"]
murder4 = fourth10_sorted_gun_ownership["Murder and nonnegligent manslaughter rate"]
murder5 = fifth11_sorted_gun_ownership["Murder and nonnegligent manslaughter rate"]
stats.f_oneway(murder1, murder2, murder3, murder4, murder5)

In [None]:
# calculate the p-value for robbery
robbery1 = first10_sorted_gun_ownership["Robbery rate"]
robbery2  = second10_sorted_gun_ownership["Robbery rate"]
robbery3 = third10_sorted_gun_ownership["Robbery rate"]
robbery4 = fourth10_sorted_gun_ownership["Robbery rate"]
robbery5 = fifth11_sorted_gun_ownership["Robbery rate"]
stats.f_oneway(robbery1, robbery2, robbery3, robbery4, robbery5)

In [None]:
# calculate the p-value for assault
assault1 = first10_sorted_gun_ownership["Aggravated assault rate"]
assault2  = second10_sorted_gun_ownership["Aggravated assault rate"]
assault3 = third10_sorted_gun_ownership["Aggravated assault rate"]
assault4 = fourth10_sorted_gun_ownership["Aggravated assault rate"]
assault5 = fifth11_sorted_gun_ownership["Aggravated assault rate"]
stats.f_oneway(assault1, assault2, assault3, assault4, assault5)

In [None]:
# calculate the p-value for burglary
burglary1 = first10_sorted_gun_ownership["Burglary rate"]
burglary2  = second10_sorted_gun_ownership["Burglary rate"]
burglary3 = third10_sorted_gun_ownership["Burglary rate"]
burglary4 = fourth10_sorted_gun_ownership["Burglary rate"]
burglary5 = fifth11_sorted_gun_ownership["Burglary rate"]
stats.f_oneway(burglary1, burglary2, burglary3, burglary4, burglary5)

In [None]:
# calculate the p-value for motor vehicle theft
vehicletheft1 = first10_sorted_gun_ownership["Motor vehicle theft rate"]
vehicletheft2  = second10_sorted_gun_ownership["Motor vehicle theft rate"]
vehicletheft3 = third10_sorted_gun_ownership["Motor vehicle theft rate"]
vehicletheft4 = fourth10_sorted_gun_ownership["Motor vehicle theft rate"]
vehicletheft5 = fifth11_sorted_gun_ownership["Motor vehicle theft rate"]
stats.f_oneway(vehicletheft1, vehicletheft2, vehicletheft3, vehicletheft4, vehicletheft5)

In [None]:
# Teammate Glenda code ends here

In [None]:
# Teammate Chris code resumes here
from scipy.stats import linregress

murder_rate = gun_ownership_df["Murder and nonnegligent manslaughter rate"]
assault_rate = gun_ownership_df["Aggravated assault rate"]
GTA_rate = gun_ownership_df["Motor vehicle theft rate"]
robbery_rate = gun_ownership_df["Robbery rate"]
burglary_rate = gun_ownership_df["Burglary rate"]

gun_ownership_rate = gun_ownership_df["# of guns per capita"]
poverty_rate_2014 = gun_ownership_df["2014 Poverty Rate"]

# Calculate the correlation coefficient and linear regression model
correlation = st.pearsonr(gun_ownership_rate, murder_rate)
print(f"The correlation between State gun ownership rate and murder/manslaughter rate is {round(correlation[0],2)}")
correlation = st.pearsonr(gun_ownership_rate, assault_rate)
print(f"The correlation between State gun ownership rate and aggravated assault rate is {round(correlation[0],2)}")
correlation = st.pearsonr(gun_ownership_rate, GTA_rate)
print(f"The correlation between State gun ownership rate and motor vehicle theft rate is {round(correlation[0],2)}")
correlation = st.pearsonr(gun_ownership_rate, robbery_rate)
print(f"The correlation between State gun ownership rate and robbery rate is {round(correlation[0],2)}")
correlation = st.pearsonr(gun_ownership_rate, burglary_rate)
print(f"The correlation between State gun ownership rate and burglary rate is {round(correlation[0],2)}")
print(f"")
correlation = st.pearsonr(poverty_rate_2014, murder_rate)
print(f"The correlation between State poverty rate and murder/manslaughter rate is {round(correlation[0],2)}")
correlation = st.pearsonr(poverty_rate_2014, assault_rate)
print(f"The correlation between State poverty rate and aggravated assault rate is {round(correlation[0],2)}")
correlation = st.pearsonr(poverty_rate_2014, GTA_rate)
print(f"The correlation between State poverty rate and motor vehicle theft rate is {round(correlation[0],2)}")
correlation = st.pearsonr(poverty_rate_2014, robbery_rate)
print(f"The correlation between State poverty rate and robbery rate is {round(correlation[0],2)}")
correlation = st.pearsonr(poverty_rate_2014, burglary_rate)
print(f"The correlation between State poverty rate and burglary rate is {round(correlation[0],2)}")

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(gun_ownership_rate, murder_rate)
regress_values = gun_ownership_rate * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

fig, (ax1,ax2) = plt.subplots(1,2, figsize=(10,4))  # 1 row, 2 columns

ax1.scatter(gun_ownership_rate, murder_rate)
ax1.plot(gun_ownership_rate,regress_values,"r-")
# plt.annotate(line_eq,(1,1),fontsize=15,color="red")

# plot a new line with outliers removed
(slope, intercept, rvalue, new_pvalue, stderr) = linregress(gun_ownership_rate.drop(index=["Wyoming"]), murder_rate.drop(index=["Wyoming"]))
regress_values = gun_ownership_rate.drop(index=["Wyoming"]) * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
ax2.scatter(gun_ownership_rate.drop(index=["Wyoming"]), murder_rate.drop(index=["Wyoming"]))
ax2.plot(gun_ownership_rate.drop(index=["Wyoming"]),regress_values,"r-")

plt.title("Gun Ownership Rate vs Murder and Non-Negligent Manslaughter Rate")
plt.ylabel("Murder and Non-Negligent Manslaughter Rate")
plt.xlabel("Gun Ownership Rate")

print(f"The p-value is {pvalue}")
print(f"The p-value with Wyoming (outlier) removed is {new_pvalue}")

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(gun_ownership_rate, assault_rate)
regress_values = gun_ownership_rate * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

fig, (ax1,ax2) = plt.subplots(1,2, figsize=(10,4))  # 1 row, 2 columns

ax1.scatter(gun_ownership_rate, assault_rate)
ax1.plot(gun_ownership_rate,regress_values,"r-")
# plt.annotate(line_eq,(1,1),fontsize=15,color="red")

# plot a new line with outliers removed
(slope, intercept, rvalue, new_pvalue, stderr) = linregress(gun_ownership_rate.drop(index=["Wyoming"]), assault_rate.drop(index=["Wyoming"]))
regress_values = gun_ownership_rate.drop(index=["Wyoming"]) * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
ax2.scatter(gun_ownership_rate.drop(index=["Wyoming"]), assault_rate.drop(index=["Wyoming"]))
ax2.plot(gun_ownership_rate.drop(index=["Wyoming"]),regress_values,"r-")

plt.title("Gun Ownership Rate vs Aggravated Assault Rate")
plt.ylabel("Aggravated Assault Rate")
plt.xlabel("Gun Ownership Rate")
print(f"The p-value is {pvalue}")
print(f"The p-value with Wyoming (outlier) removed is {new_pvalue}")

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(gun_ownership_rate, GTA_rate)
regress_values = gun_ownership_rate * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

fig, (ax1,ax2) = plt.subplots(1,2, figsize=(10,4))  # 1 row, 2 columns

ax1.scatter(gun_ownership_rate, GTA_rate)
ax1.plot(gun_ownership_rate,regress_values,"r-")
# plt.annotate(line_eq,(1,1),fontsize=15,color="red")

# plot a new line with outliers removed
(slope, intercept, rvalue, new_pvalue, stderr) = linregress(gun_ownership_rate.drop(index=["Wyoming"]), GTA_rate.drop(index=["Wyoming"]))
regress_values = gun_ownership_rate.drop(index=["Wyoming"]) * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
ax2.scatter(gun_ownership_rate.drop(index=["Wyoming"]), GTA_rate.drop(index=["Wyoming"]))
ax2.plot(gun_ownership_rate.drop(index=["Wyoming"]),regress_values,"r-")

plt.title("Gun Ownership Rate vs Motor Vehicle Theft Rate")
plt.ylabel("Motor Vehicle Theft Rate")
plt.xlabel("Gun Ownership Rate")
print(f"The p-value is {pvalue}")
print(f"The p-value with Wyoming (outlier) removed is {new_pvalue}")

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(gun_ownership_rate, robbery_rate)
regress_values = gun_ownership_rate * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

fig, (ax1,ax2) = plt.subplots(1,2, figsize=(10,4))  # 1 row, 2 columns

ax1.scatter(gun_ownership_rate, robbery_rate)
ax1.plot(gun_ownership_rate,regress_values,"r-")
# plt.annotate(line_eq,(1,1),fontsize=15,color="red")

# plot a new line with outliers removed
(slope, intercept, rvalue, new_pvalue, stderr) = linregress(gun_ownership_rate.drop(index=["Wyoming"]), robbery_rate.drop(index=["Wyoming"]))
regress_values = gun_ownership_rate.drop(index=["Wyoming"]) * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
ax2.scatter(gun_ownership_rate.drop(index=["Wyoming"]), robbery_rate.drop(index=["Wyoming"]))
ax2.plot(gun_ownership_rate.drop(index=["Wyoming"]),regress_values,"r-")

plt.title("Gun Ownership Rate vs Robbery Rate")
plt.ylabel("Robbery Rate")
plt.xlabel("Gun Ownership Rate")
print(f"The p-value is {pvalue}")
print(f"The p-value with Wyoming (outlier) removed is {new_pvalue}")

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(gun_ownership_rate, burglary_rate)
regress_values = gun_ownership_rate * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

fig, (ax1,ax2) = plt.subplots(1,2, figsize=(10,4))  # 1 row, 2 columns

ax1.scatter(gun_ownership_rate, burglary_rate)
ax1.plot(gun_ownership_rate,regress_values,"r-")
# plt.annotate(line_eq,(1,1),fontsize=15,color="red")

# plot a new line with outliers removed
(slope, intercept, rvalue, new_pvalue, stderr) = linregress(gun_ownership_rate.drop(index=["Wyoming"]), burglary_rate.drop(index=["Wyoming"]))
regress_values = gun_ownership_rate.drop(index=["Wyoming"]) * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
ax2.scatter(gun_ownership_rate.drop(index=["Wyoming"]), burglary_rate.drop(index=["Wyoming"]))
ax2.plot(gun_ownership_rate.drop(index=["Wyoming"]),regress_values,"r-")

plt.title("Gun Ownership Rate vs Burglary Rate")
plt.ylabel("Burglary Rate")
plt.xlabel("Gun Ownership Rate")
print(f"The p-value is {pvalue}")
print(f"The p-value with Wyoming (outlier) removed is {new_pvalue}")

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(poverty_rate_2014, murder_rate)
regress_values = poverty_rate_2014 * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(poverty_rate_2014, murder_rate)
plt.plot(poverty_rate_2014,regress_values,"r-")
# plt.annotate(line_eq,(1,1),fontsize=15,color="red")

plt.title("Poverty Rate vs Murder Rate")
plt.ylabel("Murder and Non-Negligent Manslaughter Rate")
plt.xlabel("2014 Poverty Rate")
print(f"The p-value is {pvalue}")

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(poverty_rate_2014, assault_rate)
regress_values = poverty_rate_2014 * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(poverty_rate_2014, assault_rate)
plt.plot(poverty_rate_2014,regress_values,"r-")
# plt.annotate(line_eq,(1,1),fontsize=15,color="red")

plt.title("Poverty Rate vs Aggravated Assault Rate")
plt.ylabel("Aggravated Assault Rate")
plt.xlabel("2014 Poverty Rate")
print(f"The p-value is {pvalue}")

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(poverty_rate_2014, GTA_rate)
regress_values = poverty_rate_2014 * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(poverty_rate_2014, GTA_rate)
plt.plot(poverty_rate_2014,regress_values,"r-")
# plt.annotate(line_eq,(1,1),fontsize=15,color="red")

plt.title("Poverty Rate vs Motor Vehicle Theft Rate")
plt.ylabel("Motor Vehicle Theft Rate")
plt.xlabel("2014 Poverty Rate")
print(f"The p-value is {pvalue}")

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(poverty_rate_2014, robbery_rate)
regress_values = poverty_rate_2014 * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(poverty_rate_2014, robbery_rate)
plt.plot(poverty_rate_2014,regress_values,"r-")
# plt.annotate(line_eq,(1,1),fontsize=15,color="red")

plt.title("Poverty Rate vs Robbery Rate")
plt.ylabel("Robbery Rate")
plt.xlabel("2014 Poverty Rate")
print(f"The p-value is {pvalue}")

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(poverty_rate_2014, burglary_rate)
regress_values = poverty_rate_2014 * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(poverty_rate_2014, burglary_rate)
plt.plot(poverty_rate_2014,regress_values,"r-")
# plt.annotate(line_eq,(1,1),fontsize=15,color="red")

plt.title("Poverty Rate vs Burglary Rate")
plt.ylabel("Burglary Rate")
plt.xlabel("2014 Poverty Rate")
print(f"The p-value is {pvalue}")