In [347]:
import numpy as np
import pandas as pd
import json

In [48]:
# read all Gun Violence Archive datasets from 2013-present
gva2013 = pd.read_csv('gva2013.csv')
gva2014 = pd.read_csv('gva2014.csv')
gva2015 = pd.read_csv('gva2015.csv')
gva2016 = pd.read_csv('gva2016.csv')
gva2017 = pd.read_csv('gva2017.csv')
gva2018 = pd.read_csv('gva2018.csv')
gva2019 = pd.read_csv('gva2019.csv')
gva2020 = pd.read_csv('gva2020.csv')
gva2021 = pd.read_csv('gva2021.csv')
gva2022 = pd.read_csv('gva2022.csv')
gva2023 = pd.read_csv('gva20230604.csv')

# joins all the data
dfs = [gva2013, gva2014, gva2015, gva2016, gva2017, gva2018, gva2019, gva2020, gva2021, gva2022, gva2023]
gva_data = pd.concat(dfs)
gva_data.columns


Index(['Incident ID', 'Incident Date', 'State', 'City Or County', 'Address',
       '# Victims Injured', '# Victims Killed', '# Subjects-Suspects Injured',
       '# Subjects-Suspects Killed', '# Subjects-Suspects Arrested',
       'Operations'],
      dtype='object')

In [49]:
gva_data.to_csv('./data/gvaData.csv', index=False)

OSError: Cannot save file into a non-existent directory: 'data'

In [50]:
import requests
from bs4 import BeautifulSoup

In [8]:
page = requests.get('https://wisevoter.com/state-rankings/states-with-strictest-gun-laws/#california') # Getting page HTML through request
soup = BeautifulSoup(page.content, 'html.parser') # Parsing content using beautifulsoup

In [77]:
table_rows = soup.select('tr')
key_list = [x.text for x in soup.find_all('th')]
data = {x: [] for x in key_list}
for tr in table_rows[1:]:
    values = tr.select('td')
    for i in range(len(values)):
        data[key_list[i]].append(values[i].text)
        
laws = pd.DataFrame(data, columns=data.keys())
laws.head()


Unnamed: 0,State,Gun Law Grade,Gun Death Rate,Red or Blue State
0,Colorado,B,15.4 per 100k,Democrat
1,Delaware,B,14.4 per 100k,Democrat
2,Oregon,B,13 per 100k,Democrat
3,Pennsylvania,B,13.6 per 100k,Swing State
4,Rhode Island,B,5.1 per 100k,Democrat


In [184]:
populations = pd.read_csv('state-population-table.csv')
count = gva_data.groupby("State").count().reset_index()
laws = pd.read_csv('gunLaws.csv')
merged = count.merge(populations, left_on = "State", right_on = "state", how="left")
rates = []
for state in merged["State"]:
    row = merged[merged["State"] == state].iloc[0]
    num_shootings = int(row["Incident ID"])
    pop = row["pop2023"]
    rates.append((num_shootings / pop) * 1_000_000)
merged = merged.assign(Rates=rates)
merged = merged.merge(laws, left_on = "State", right_on = "State", how="outer")

In [186]:
merged.to_csv('mapData.csv', index=False)

In [690]:
shooters = pd.read_csv("Full Database-Table 1.csv")
shooters = shooters[['Shooter Last Name', 
                     'Shooter First Name', 
                     'Day of Week',
                     'Day',
                     'Month',
                     'Year',
                     'City', 
                     'State',
                     'Location',
                     'Number Killed', 
                     'Number Injured', 
                     'Age', 
                     'Gender', 
                     'Race', 
                     'Immigrant',
                     'Sexual Orientation', 
                     'Religion', 
                     'Education',
                     'Part I Crimes',
                     'Part II Crimes',
                     'Mental Illness',
                     'Substance Use']]

shooters = shooters.replace({'Location':{"10":"Post Office", "0":"K-12 School", "1":"College/University", "2":"Government Building/Place of Civic Importance","3": "House of Worship","4":"Retail","5":"Restaurant/Bar/Nightclub","6":"Office","7":"Place of Residence","8": "Outdoors","9":"Warehouse/Factory"},
                             'Gender': {0: "Male", 1: "Female", 3: "Non-Binary", 4: "Transgender"},
                             "Race": {"0": "White", "1": "Black", "2":"Latinx", "3":"Asian", "4":"Middle Eastern", "5":"Native American"},
                             "Immigrant": {0:"No", 1:"Yes"},
                             "Sexual Orientation": {0: "Heterosexual", 1: "Not Heterosexual"},
                             "Religion": {0: "None", 1: "Christian", 2: "Muslim", 3: "Buddhist", 4: "Cultural spirituality/Other", 5: "Jewish"},
                             "Education": {0: "Less than highschool", 1:"High school/GED", 2:"Some college/trade school", 3: "Bachelor's degree", 4: "Graduate school/advanced degree"},
                             "Part I Crimes": {"0":"No evidence", "1":"Homicide","2":"Forcible Rape","3":"Robbery","4":"Aggravated Assault","5":"Burglary","6":"Larceny-Theft","7":"Motor Vehicle Theft","8":"Arson"},
                             "Part II Crimes": {"0": "No evidence","1": "Simple Assault","2":"Fraud, Forgery, Embezzlement","3":"Stolen Property","4":"Vandalism","5":"Weapons Offenses","6":"Prostitution or Other Non-rape Sex Offenses","7":"Drugs","8":"DUI","9":"Other"},
                             "Mental Illness": {"0": "No evidence", "1": "Mood disorder", "2": "Thought disorder", "3": "Other psychiatric disorder", "4": "Indication of psychiatric disorder but no diagnosis"},
                             "Substance Use": {"0": "No evidence", "1": "Problem with alcohol", "2": "Marijuana", "3": "Other drugs"},
                             }, regex=True)

In [691]:
def crimes(x):
    lambda x: x.remove("No evidence") if (x.count("No evidence") == 1 and len(x) >= 2) else x
    if (x.count("No evidence") == 1 and len(x) >= 2):
        x.remove("No evidence")
        return x
    elif (x.count("No evidence") == 2):
        return ["No evidence"]
    elif (x.count("Unknown") >= 1):
        return ["Unknown"]
    else:
        return x

shooters = shooters.fillna("Unknown")
shooters["Location"] = shooters["Location"].apply(lambda x: x.split(',')).apply(lambda x: [i.strip() for i in x])
shooters["Gender"] = shooters["Gender"].apply(lambda x: [x])
shooters["Race"] = shooters["Race"].apply(lambda x: [x])
shooters["Sexual Orientation"] = shooters["Sexual Orientation"].apply(lambda x: [x])
shooters["Religion"] = shooters["Religion"].apply(lambda x: [x])
shooters["Education"] = shooters["Education"].apply(lambda x: [x])
shooters["Part I Crimes"] = shooters["Part I Crimes"].str.strip("`").apply(lambda x: x.split(',')).apply(lambda x: [i.strip() for i in x])
shooters["Part II Crimes"] = shooters["Part II Crimes"].str.strip("`").apply(lambda x: x.split(',')).apply(lambda x: [i.strip() for i in x])
shooters["Criminal Record"] = (shooters["Part I Crimes"] + shooters["Part II Crimes"]).apply(crimes)
shooters["Mental Illness"] = shooters["Mental Illness"].apply(lambda x: x.split(',')).apply(lambda x: [i.strip() for i in x])
shooters["Substance Use"] = shooters["Substance Use"].apply(lambda x: x.split(',')).apply(lambda x: [i.strip() for i in x])


In [692]:
shooters = shooters.assign(id=np.arange(len(shooters)) + 1)
shooters = shooters[shooters['Year'] >= 2000]
networkData = {}
#Create the nodes
networkData["nodes"] = shooters.to_dict(orient='records')

attributes = ['Location', 'Gender', 'Race', 'Sexual Orientation', 'Religion', 'Criminal Record', 'Mental Illness', 'Substance Use']
for attribute in attributes:
  links = []
  for i in range(len(shooters)):
    for j in range(i + 1, len(shooters)):
        row1 = shooters.iloc[i]
        row2 = shooters.iloc[j]
        id1 = row1["id"]
        id2 = row2["id"]
        value = 0
        val2 = [x.strip() for x in row2[attribute]]
        val1 = [x.strip() for x in row1[attribute]]
        if (any(item in val1 for item in val2)):
            value += 1
            link = {"source": int(id1), "target": int(id2), "value": int(value)}
            links.append(link)
  networkData["links"] = links
  with open("networkData_" + attribute + ".json", "w") as outfile:
    json.dump(networkData, outfile)

In [702]:
populations.iloc[0]

fips                        6
state              California
densityMi           249.81347
pop2023              38915693
pop2022              39029342
pop2020              39501653
pop2019            39276883.3
pop2010              37253956
growthRate           -0.00291
growth                -113649
growthSince2010       0.04461
area                   155779
rank                        1
percent               0.11656
Name: 0, dtype: object