# Exploration of Weapons (Firearms) Data
Source: The Violence Project

## Load data

In [82]:
import pandas as pd
from pathlib import Path

In [83]:
csv = Path('raw_data/firearms.csv')
firearms_df = pd.read_csv(csv)

## Clean data

### Drop empty columns and rows

In [84]:
# Drop unnecessary columns (empty)
cleaned_firearms_df = firearms_df.drop(columns=['Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21',
       'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25',
       'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28'])

# Drop rows where Case # is not defined
cleaned_firearms_df = cleaned_firearms_df.loc[cleaned_firearms_df['Case #'].notna(),:]
cleaned_firearms_df

Unnamed: 0,Case #,Shooter Last Name,Shooter First Name,Full Date,Make and Model,Classification,Caliber,Used in Shooting?,Modified,Large Capacity Magazine,Extended Magazine,When Obtained,Legal Purchase,Illegal Purchase,Assembled with Legal Parts,Gifted,Theft,Unknown
0,1,Whitman,Charles,1/08/1966,Remington 700 6mm bolt-action rifle,2.0,0.0,1.0,0.0,0.0,0.0,1.0,,,,,,1.0
1,1,Whitman,Charles,1/08/1966,Remington .35-caliber Model 141 pump-action rifle,2.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,1,Whitman,Charles,1/08/1966,.30 M1 carbine,2.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,1,Whitman,Charles,1/08/1966,6.35mm Galesi-brescia pistol,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,1,Whitman,Charles,1/08/1966,9mm Luger pistol,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
408,193,Hale,Audrey,3/27/2023,9 mm Kel-Tec SUB2000 pistol caliber carbine,3.0,1.0,1.0,1.0,,,1.0,1.0,0.0,1.0,0.0,0.0,0.0
409,193,Hale,Audrey,3/27/2023,9 mm Smith and Wesson M&P Shield EZ 2.0 handgun,0.0,1.0,1.0,0.0,,,1.0,1.0,0.0,1.0,0.0,0.0,0.0
410,194,Sturgeon,Connor,4/10/2023,AR-15-style rifle,3.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
411,195,Garcia,Mauricio,5/6/2023,AR-15-style rifle,3.0,0.0,,,,,,,,,,,1.0


### Replace column name with spaces

In [85]:
for c in cleaned_firearms_df.columns:
    print(f"'{c}'")

'Case #'
'Shooter Last Name'
'Shooter First Name'
'Full Date'
'Make and Model'
'Classification'
'Caliber'
'Used in Shooting?'
'Modified'
'Large Capacity Magazine'
'Extended Magazine'
'When Obtained'
'Legal Purchase'
'Illegal Purchase'
'Assembled with Legal Parts '
'Gifted'
'Theft'
'Unknown'


In [86]:
cleaned_firearms_df = cleaned_firearms_df.rename(columns={'Assembled with Legal Parts ': 'Assembled with Legal Parts'})

### Replace NaN
In the case of the firearm origigin, NaN are replaced by 0. In all other cases, NaN values are replaced with 'Unknown'.

In [87]:
for c in ['Unknown']:
    cleaned_firearms_df = cleaned_firearms_df.fillna({c: 1})

In [88]:
for c in cleaned_firearms_df.columns:
    cleaned_firearms_df = cleaned_firearms_df.fillna({c: 'Unknown'})

In [89]:
cleaned_firearms_df

Unnamed: 0,Case #,Shooter Last Name,Shooter First Name,Full Date,Make and Model,Classification,Caliber,Used in Shooting?,Modified,Large Capacity Magazine,Extended Magazine,When Obtained,Legal Purchase,Illegal Purchase,Assembled with Legal Parts,Gifted,Theft,Unknown
0,1,Whitman,Charles,1/08/1966,Remington 700 6mm bolt-action rifle,2.0,0.0,1.0,0.0,0.0,0.0,1.0,Unknown,Unknown,Unknown,Unknown,Unknown,1.0
1,1,Whitman,Charles,1/08/1966,Remington .35-caliber Model 141 pump-action rifle,2.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,1,Whitman,Charles,1/08/1966,.30 M1 carbine,2.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,1,Whitman,Charles,1/08/1966,6.35mm Galesi-brescia pistol,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,1,Whitman,Charles,1/08/1966,9mm Luger pistol,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
408,193,Hale,Audrey,3/27/2023,9 mm Kel-Tec SUB2000 pistol caliber carbine,3.0,1.0,1.0,1.0,Unknown,Unknown,1.0,1.0,0.0,1.0,0.0,0.0,0.0
409,193,Hale,Audrey,3/27/2023,9 mm Smith and Wesson M&P Shield EZ 2.0 handgun,0.0,1.0,1.0,0.0,Unknown,Unknown,1.0,1.0,0.0,1.0,0.0,0.0,0.0
410,194,Sturgeon,Connor,4/10/2023,AR-15-style rifle,3.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
411,195,Garcia,Mauricio,5/6/2023,AR-15-style rifle,3.0,0.0,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,1.0


## Convert float64 to int64
Where applicable

In [91]:
cleaned_firearms_df = cleaned_firearms_df.astype({"Unknown": "int"})

cleaned_firearms_df

Unnamed: 0,Case #,Shooter Last Name,Shooter First Name,Full Date,Make and Model,Classification,Caliber,Used in Shooting?,Modified,Large Capacity Magazine,Extended Magazine,When Obtained,Legal Purchase,Illegal Purchase,Assembled with Legal Parts,Gifted,Theft,Unknown
0,1,Whitman,Charles,1/08/1966,Remington 700 6mm bolt-action rifle,2.0,0.0,1.0,0.0,0.0,0.0,1.0,Unknown,Unknown,Unknown,Unknown,Unknown,1
1,1,Whitman,Charles,1/08/1966,Remington .35-caliber Model 141 pump-action rifle,2.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0
2,1,Whitman,Charles,1/08/1966,.30 M1 carbine,2.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0
3,1,Whitman,Charles,1/08/1966,6.35mm Galesi-brescia pistol,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0
4,1,Whitman,Charles,1/08/1966,9mm Luger pistol,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
408,193,Hale,Audrey,3/27/2023,9 mm Kel-Tec SUB2000 pistol caliber carbine,3.0,1.0,1.0,1.0,Unknown,Unknown,1.0,1.0,0.0,1.0,0.0,0.0,0
409,193,Hale,Audrey,3/27/2023,9 mm Smith and Wesson M&P Shield EZ 2.0 handgun,0.0,1.0,1.0,0.0,Unknown,Unknown,1.0,1.0,0.0,1.0,0.0,0.0,0
410,194,Sturgeon,Connor,4/10/2023,AR-15-style rifle,3.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0
411,195,Garcia,Mauricio,5/6/2023,AR-15-style rifle,3.0,0.0,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,1


## Replace numerical values by explicit values
Based on codebook

In [92]:
def replace_code_by_value(df, field, codes_dict):
    # Replace code by explicit value for each code in field
    for key in list(codes_dict.keys()):
        df.loc[df[field]==key,field] = codes_dict[key]

In [93]:
# Classification codes
codes_firearm_classification = {
    0: 'Handgun',
    1: 'Shotgun',
    2: 'Rifle',
    3: 'Assault weapon'
}

# Caliber codes
codes_firearm_caliber = {
    0: 'Small',
    1: 'Medium',
    2: 'Large'
}

# "Used in shooting" codes
codes_firearm_used = {
    0: 'No evidence',
    1: 'Yes',
    2: 'Suicide only'
}

# Modified codes
codes_firearm_modified = {
    0: 'No evidence',
    1: 'Yes'
}

# "Large Capacity Magazine" codes
codes_firearm_large_mag = {
    0: 'No evidence',
    1: 'Yes'
}

# "Extended magazine" codes
codes_firearm_extended_mag = {
    0: 'No evidence',
    1: 'Yes'
}

# "When obtained" codes
codes_firearm_when = {
    0: '< 1 month prior',
    1: '> 1 month prior'
}

# "Legal purchase" codes
codes_firearm_legal = {
    0: 'Illegal',
    1: 'Federal Firearms Licensed dealer',
    2: 'Unregulated private sale',
    3: 'Legal but specific source unknown'
}

# "Illegal purchase" codes
codes_firearm_illegal = {
    0: 'Legal',
    1: 'System failure',
    2: 'Straw purchas',
    3: 'Lying and buying',
    4: 'Illegal street sale',
    5: 'Illegal but specific source unknown',
    6: 'Legal sale but illegal possession'
}

# "Assembled with legal parts" codes
codes_firearm_assembly = {
    0: 'No',
    1: 'Yes'
}

# "Gifted" codes
codes_firearm_gifted = {
    0: 'No',
    1: 'Yes'
}
				
# Theft codes
codes_firearm_theft = {
    0: 'No',
    1: 'Theft/borrowed from family or friend',
    2: 'Theft other',
    3: 'Taken at the scene'
}

# "Unknown" codes
codes_firearm_unknown = {
    0: 'No',
    1: 'Yes'
}






# Replace code by explicit values based on the codebook
replace_code_by_value(cleaned_firearms_df, 'Classification', codes_firearm_classification)
replace_code_by_value(cleaned_firearms_df, 'Caliber', codes_firearm_caliber)
replace_code_by_value(cleaned_firearms_df, 'Used in Shooting?', codes_firearm_used)
replace_code_by_value(cleaned_firearms_df, 'Modified', codes_firearm_modified)
replace_code_by_value(cleaned_firearms_df, 'Large Capacity Magazine', codes_firearm_large_mag)
replace_code_by_value(cleaned_firearms_df, 'Extended Magazine', codes_firearm_extended_mag)
replace_code_by_value(cleaned_firearms_df, 'When Obtained', codes_firearm_when)
replace_code_by_value(cleaned_firearms_df, 'Legal Purchase', codes_firearm_legal)
replace_code_by_value(cleaned_firearms_df, 'Illegal Purchase', codes_firearm_illegal)
replace_code_by_value(cleaned_firearms_df, 'Assembled with Legal Parts', codes_firearm_assembly)
replace_code_by_value(cleaned_firearms_df, 'Gifted', codes_firearm_gifted)
replace_code_by_value(cleaned_firearms_df, 'Theft', codes_firearm_theft)
replace_code_by_value(cleaned_firearms_df, 'Unknown', codes_firearm_unknown)


cleaned_firearms_df


Unnamed: 0,Case #,Shooter Last Name,Shooter First Name,Full Date,Make and Model,Classification,Caliber,Used in Shooting?,Modified,Large Capacity Magazine,Extended Magazine,When Obtained,Legal Purchase,Illegal Purchase,Assembled with Legal Parts,Gifted,Theft,Unknown
0,1,Whitman,Charles,1/08/1966,Remington 700 6mm bolt-action rifle,Rifle,Small,Yes,No evidence,No evidence,No evidence,> 1 month prior,Unknown,Unknown,Unknown,Unknown,Unknown,Yes
1,1,Whitman,Charles,1/08/1966,Remington .35-caliber Model 141 pump-action rifle,Rifle,Medium,No evidence,No evidence,No evidence,No evidence,< 1 month prior,Federal Firearms Licensed dealer,Legal,No,No,No,No
2,1,Whitman,Charles,1/08/1966,.30 M1 carbine,Rifle,Medium,Yes,No evidence,Yes,No evidence,< 1 month prior,Federal Firearms Licensed dealer,Legal,No,No,No,No
3,1,Whitman,Charles,1/08/1966,6.35mm Galesi-brescia pistol,Handgun,Small,No evidence,No evidence,No evidence,No evidence,< 1 month prior,Federal Firearms Licensed dealer,Legal,No,No,No,No
4,1,Whitman,Charles,1/08/1966,9mm Luger pistol,Handgun,Medium,No evidence,No evidence,No evidence,No evidence,< 1 month prior,Federal Firearms Licensed dealer,Legal,No,No,No,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
408,193,Hale,Audrey,3/27/2023,9 mm Kel-Tec SUB2000 pistol caliber carbine,Assault weapon,Medium,Yes,Yes,Unknown,Unknown,> 1 month prior,Federal Firearms Licensed dealer,Legal,Yes,No,No,No
409,193,Hale,Audrey,3/27/2023,9 mm Smith and Wesson M&P Shield EZ 2.0 handgun,Handgun,Medium,Yes,No evidence,Unknown,Unknown,> 1 month prior,Federal Firearms Licensed dealer,Legal,Yes,No,No,No
410,194,Sturgeon,Connor,4/10/2023,AR-15-style rifle,Assault weapon,Small,Yes,No evidence,No evidence,No evidence,< 1 month prior,Federal Firearms Licensed dealer,Legal,Yes,No,No,No
411,195,Garcia,Mauricio,5/6/2023,AR-15-style rifle,Assault weapon,Small,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Yes


In [94]:
cleaned_firearms_df['Legal Purchase'].value_counts()

Unknown                              147
Federal Firearms Licensed dealer     138
Legal but specific source unknown     62
Illegal                               54
Unregulated private sale              12
Name: Legal Purchase, dtype: int64

In [95]:
cleaned_firearms_df['Illegal Purchase'].value_counts()

Legal                                  209
Unknown                                150
Illegal but specific source unknown     14
System failure                          12
Lying and buying                        10
Straw purchas                            7
Legal sale but illegal possession        7
Illegal street sale                      4
Name: Illegal Purchase, dtype: int64

In [96]:
cleaned_firearms_df['Unknown'].value_counts()

No     317
Yes     96
Name: Unknown, dtype: int64

## Save DataFrame as CSV file

In [None]:
csv_out = Path('clean_data/clean_firearms.csv')
cleaned_firearms_df.to_csv(csv_out, index=False)