In [2]:
import pandas as pd 

In [3]:
#import shootings csv file to DataFrame
data = pd.read_csv('fatal-police-shootings-data.csv') 
data 

Unnamed: 0,id,date,threat_type,flee_status,armed_with,city,county,state,latitude,longitude,location_precision,name,age,gender,race,race_source,was_mental_illness_related,body_camera,agency_ids
0,3,2015-01-02,point,not,gun,Shelton,Mason,WA,47.246826,-123.121592,not_available,Tim Elliot,53.0,male,A,not_available,True,False,73
1,4,2015-01-02,point,not,gun,Aloha,Washington,OR,45.487421,-122.891696,not_available,Lewis Lee Lembke,47.0,male,W,not_available,False,False,70
2,5,2015-01-03,move,not,unarmed,Wichita,Sedgwick,KS,37.694766,-97.280554,not_available,John Paul Quintero,23.0,male,H,not_available,False,False,238
3,8,2015-01-04,point,not,replica,San Francisco,San Francisco,CA,37.762910,-122.422001,not_available,Matthew Hoffman,32.0,male,W,not_available,True,False,196
4,9,2015-01-04,point,not,other,Evans,Weld,CO,40.383937,-104.692261,not_available,Michael Rodriguez,39.0,male,H,not_available,False,False,473
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8405,9147,2023-04-22,point,not,gun,Mesquite,Dallas,TX,,,,,,male,,,False,False,
8406,9149,2023-04-22,shoot,,gun,West Jordan,Salt Lake,UT,40.593788,-111.969553,address,,,male,,,False,False,751
8407,9144,2023-04-23,,,undetermined,Pineville,Rapides,LA,31.207175,-92.149489,block,,,,,,False,False,21677
8408,9153,2023-04-23,attack,not,unknown,Shreveport,Caddo,LA,32.435405,-93.783105,intersection,Joseph Dewayne Taylor,33.0,male,B,public_record,False,False,772


In [4]:
data.dtypes

id                              int64
date                           object
threat_type                    object
flee_status                    object
armed_with                     object
city                           object
county                         object
state                          object
latitude                      float64
longitude                     float64
location_precision             object
name                           object
age                           float64
gender                         object
race                           object
race_source                    object
was_mental_illness_related       bool
body_camera                      bool
agency_ids                     object
dtype: object

In [5]:
#dropping unnecessary columns
data_clean = data.drop(columns=['latitude', 'longitude', 'location_precision', 'threat_type', 'flee_status', 
                                'race_source', 'was_mental_illness_related', 'body_camera'])
data_clean

Unnamed: 0,id,date,armed_with,city,county,state,name,age,gender,race,agency_ids
0,3,2015-01-02,gun,Shelton,Mason,WA,Tim Elliot,53.0,male,A,73
1,4,2015-01-02,gun,Aloha,Washington,OR,Lewis Lee Lembke,47.0,male,W,70
2,5,2015-01-03,unarmed,Wichita,Sedgwick,KS,John Paul Quintero,23.0,male,H,238
3,8,2015-01-04,replica,San Francisco,San Francisco,CA,Matthew Hoffman,32.0,male,W,196
4,9,2015-01-04,other,Evans,Weld,CO,Michael Rodriguez,39.0,male,H,473
...,...,...,...,...,...,...,...,...,...,...,...
8405,9147,2023-04-22,gun,Mesquite,Dallas,TX,,,male,,
8406,9149,2023-04-22,gun,West Jordan,Salt Lake,UT,,,male,,751
8407,9144,2023-04-23,undetermined,Pineville,Rapides,LA,,,,,21677
8408,9153,2023-04-23,unknown,Shreveport,Caddo,LA,Joseph Dewayne Taylor,33.0,male,B,772


In [6]:
pd.unique(data_clean['gender'])

array(['male', 'female', nan, 'non-binary'], dtype=object)

In [7]:
pd.unique(data_clean['race'])

array(['A', 'W', 'H', 'B', 'O', nan, 'N', 'B;H'], dtype=object)

In [8]:
#dropped na values from DataFrame
data_clean.dropna()

Unnamed: 0,id,date,armed_with,city,county,state,name,age,gender,race,agency_ids
0,3,2015-01-02,gun,Shelton,Mason,WA,Tim Elliot,53.0,male,A,73
1,4,2015-01-02,gun,Aloha,Washington,OR,Lewis Lee Lembke,47.0,male,W,70
2,5,2015-01-03,unarmed,Wichita,Sedgwick,KS,John Paul Quintero,23.0,male,H,238
3,8,2015-01-04,replica,San Francisco,San Francisco,CA,Matthew Hoffman,32.0,male,W,196
4,9,2015-01-04,other,Evans,Weld,CO,Michael Rodriguez,39.0,male,H,473
...,...,...,...,...,...,...,...,...,...,...,...
8397,9142,2023-04-19,gun,Bristol,Sullivan,TN,Casey Eric Crowley,48.0,male,W,2408
8398,9151,2023-04-19,gun,Kansas City,Jackson,MO,Reginald E. Byers Jr.,43.0,male,B,201
8399,9152,2023-04-20,gun,Indianapolis,Marion,IN,Daniel Yefter Ghebrehiwet,46.0,male,B,13886
8408,9153,2023-04-23,unknown,Shreveport,Caddo,LA,Joseph Dewayne Taylor,33.0,male,B,772


In [9]:
#set datatypes for multiple columns, most notably the date column
data_clean.astype({'age': 'Int64',
                   'id' : 'object',
                   'gender': 'category',
                   'race': 'category',
                  'date': 'datetime64[ns]'}).dtypes

id                    object
date          datetime64[ns]
armed_with            object
city                  object
county                object
state                 object
name                  object
age                    Int64
gender              category
race                category
agency_ids            object
dtype: object

In [10]:
#set name as the index for the DataFrame
shootings_df = data_clean.set_index('id')
shootings_df

Unnamed: 0_level_0,date,armed_with,city,county,state,name,age,gender,race,agency_ids
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3,2015-01-02,gun,Shelton,Mason,WA,Tim Elliot,53.0,male,A,73
4,2015-01-02,gun,Aloha,Washington,OR,Lewis Lee Lembke,47.0,male,W,70
5,2015-01-03,unarmed,Wichita,Sedgwick,KS,John Paul Quintero,23.0,male,H,238
8,2015-01-04,replica,San Francisco,San Francisco,CA,Matthew Hoffman,32.0,male,W,196
9,2015-01-04,other,Evans,Weld,CO,Michael Rodriguez,39.0,male,H,473
...,...,...,...,...,...,...,...,...,...,...
9147,2023-04-22,gun,Mesquite,Dallas,TX,,,male,,
9149,2023-04-22,gun,West Jordan,Salt Lake,UT,,,male,,751
9144,2023-04-23,undetermined,Pineville,Rapides,LA,,,,,21677
9153,2023-04-23,unknown,Shreveport,Caddo,LA,Joseph Dewayne Taylor,33.0,male,B,772


In [11]:
#Task 3, Q1
shootings_df.loc[1694]

date                2016-07-06
armed_with                 gun
city            Falcon Heights
county                  Ramsey
state                       MN
name          Philando Castile
age                       32.0
gender                    male
race                         B
agency_ids                1359
Name: 1694, dtype: object

In [12]:
#Task 3, Q2
mn_shootings = shootings_df.loc[shootings_df['state'] == 'MN'] 
mn_shootings

Unnamed: 0_level_0,date,armed_with,city,county,state,name,age,gender,race,agency_ids
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
49,2015-01-14,,St. Paul,Ramsey,MN,Marcus Golden,24.0,male,B,63
60,2015-01-15,gun,Bloomington,Hennepin,MN,Quincy Reed Reindl,24.0,male,W,139
324,2015-01-26,gun,New Hope,Hennepin,MN,Raymond Kmetz,68.0,male,W,1953;109
212,2015-03-16,gun,St. Paul,Ramsey,MN,Justin Tolkinen,28.0,male,W,63
454,2015-05-08,,Fridley,Anoka,MN,Sam Holmes,31.0,male,B,476
...,...,...,...,...,...,...,...,...,...,...
8744,2022-12-15,knife,New Auburn,Sibley,MN,Brent A. Alsleben,34.0,male,,896
8947,2023-02-11,knife,St. Paul,Ramsey,MN,,,male,,63
8975,2023-02-24,knife,Duluth,St. Louis,MN,Zachary Shogren,34.0,male,,2376
9009,2023-03-04,gun,Stillwater,Washington,MN,Okwan Rahmier Sims,21.0,male,,2338


In [17]:

race_df = shootings_df['race'].value_counts() 
race_df

race
W      3679
B      1939
H      1296
A       143
N       111
O        20
B;H       1
Name: count, dtype: int64

ValueError: Grouper for 'race' not 1-dimensional