In [1]:

%matplotlib inline

# Import dependencies
import pandas as pd
import numpy as np
import os 
import matplotlib.pyplot as plt


file = os.path.join('MidwestMurderData.csv')
midwest_homicide_df = pd.read_csv(file)


In [2]:
midwest_homicide_df['Weapon'].unique()

array(['Knife or cutting instrument', 'Handgun - pistol, revolver, etc',
       'Shotgun', 'Strangulation - hanging',
       'Personal weapons, includes beating',
       'Blunt object - hammer, club, etc', 'Rifle',
       'Other or type unknown', 'Firearm, type not stated', 'Fire',
       'Asphyxiation - includes death by gas', 'Drowning',
       'Pushed or thrown out window',
       'Narcotics or drugs, sleeping pills', 'Explosives', 'Other gun',
       'Poison - does not include gas'], dtype=object)

In [3]:
#Create a dataframe with only the columns I'm interested in
relationship_df = midwest_homicide_df[['Relationship', 'VicSex','Weapon', 'OffSex']]


#Split the weapons into "close contact" and "minimal contact" categories
relationship_df['Weapon'] = relationship_df['Weapon'].replace(
                                                {"Knife or cutting instrument": "Close Contact",
                                                "Handgun - pistol, revolver, etc": "No/Little Contact",
                                                "Shotgun": "No/Little Contact",
                                                "Strangulation - hanging": "No/Little Contact",
                                                "Personal weapons, includes beating": "Close Contact",
                                                "Blunt object - hammer, club, etc" : "Close Contact",
                                                "Rifle": "No/Little Contact",
                                                "Firearm, type not stated": "No/Little Contact",
                                                "Asphyxiation - includes death by gas": "Close Contact",
                                                "Other gun": "No/Little Contact",
                                                "Fire": "No/Little Contact",
                                                "Drowning": "Close Contact",
                                                "Pushed or thrown out window": "No/Little Contact",
                                                "Narcotics or drugs, sleeping pills": "No/Little Contact",
                                                "Explosives": "No/Little Contact",
                                                "Poison - does not include gas": "No/Little Contact"})

#Combine similar relationships together for the male and female victim charts
relationship_df['Relationship'] = relationship_df["Relationship"].replace(
                                                  {'Brother': 'Sibling',
                                                   'Sister': 'Sibling',
                                                   'Father': 'Parent',
                                                   'Mother': 'Parent',
                                                   'Daughter': 'Child',
                                                   'Son': 'Child',
                                                   'Stepson': 'Child',
                                                   "Stepdaughter": "Child",
                                                   "Homosexual relationship": 'Partner',
                                                   "Boyfriend" : "Partner",
                                                   "Girlfriend" : "Partner",
                                                   "Common-law husband" : "Partner",
                                                   "Common-law wife" : "Partner",
                                                   "Ex-husband" : "Spouse",
                                                   "Ex-wife" : "Spouse",
                                                   "Stepmother" : "Parent",
                                                   "Stepfather" : "Parent",
                                                   "Husband" : "Spouse",
                                                   "Wife" : "Spouse",
                                                  "Friend": 'Other - known to victim',
                                                  "Neighbor": 'Other - known to victim',
                                                  "Employee": 'Other - known to victim',
                                                  "Employer": 'Other - known to victim',
                                                  "In-law" : 'Other - known to victim'})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [6]:
#Female Datafarme - pull out only female victims
victims_df = relationship_df[['Relationship', 'VicSex']]
female_victims = victims_df.loc[victims_df["VicSex"]=='Female', :]

#Group Female Victims by Relationships
female_victims = female_victims.groupby('Relationship').count()

#Rename columns
female_victims_df = female_victims.rename(columns={"VicSex": "Female Victim"})

female_victims_df

Unnamed: 0_level_0,Female Victim
Relationship,Unnamed: 1_level_1
Acquaintance,5206
Child,2103
Other - known to victim,2900
Other family,648
Parent,1039
Partner,4006
Relationship not determined,9592
Sibling,348
Spouse,5035
Stranger,2525


In [17]:
#Sort values so graphs are in ascending order
female_df = female_victims_df.reset_index().sort_values("Female Victim")

#Drop the highest outlier value: Relationship Undefined
female_df = female_df.drop(6)
female_df

Unnamed: 0,Relationship,Female Victim
7,Sibling,348
3,Other family,648
4,Parent,1039
1,Child,2103
9,Stranger,2525
2,Other - known to victim,2900
5,Partner,4006
8,Spouse,5035
0,Acquaintance,5206


In [15]:
#Sort values so graphs are in ascending order
female_df = female_victims_df.reset_index().sort_values("Female Victim")

#Drop the highest outlier value: Relationship Undefined
female_df = female_df.drop(6)

#Find the sum of all Female Victims
female_df.sum()

#Calculate the percentage of female victims by relationship (23810 is the sum of all female victims)
female_percent = female_df['Female Victim']/23810*100
female_percent = female_percent.round()
female_percent

7     1.0
3     3.0
4     4.0
1     9.0
9    11.0
2    12.0
5    17.0
8    21.0
0    22.0
Name: Female Victim, dtype: float64

In [8]:
#Plot Female Victim Relationship Information
y_axis = np.arange(len(female_percent))
tick_locations = [value for value in y_axis]

#plt.figure(figsize=(20,3))
plt.barh(y_axis, female_percent, color= '#6EDBA1', alpha= 1.0, align="center")
plt.yticks(tick_locations, female_df["Relationship"], fontname='Chaparral Pro', 
           fontsize=12, rotation="horizontal")


# Set a Title and labels
plt.title("Female Victim Relationship to Offender", pad=20, fontname='Chaparral Pro', fontsize=20)
plt.xlabel("Percentage of Murders", labelpad=20, fontname='Chaparral Pro', fontsize=12)
plt.show()

NameError: name 'female_percent' is not defined

In [10]:
#Male Dataframe - Pull out only Male Victims
male = victims_df.loc[victims_df["VicSex"] == "Male", :]

#Group Male Victims by Relationships
male_victims = male.groupby('Relationship').count()

#Rename Columns
male_victims_df = male_victims.rename(columns={"VicSex": "Male Victim"})

male_victims_df.sum()

Male Victim    111465
dtype: int64

In [13]:
#Sort values so graphs are in ascending order
male_df = male_victims_df.reset_index().sort_values("Male Victim")

#Drop the highest outlier value: Relationship Undefined
male_df = male_df.drop(6)
male_df

Unnamed: 0,Relationship,Male Victim
7,Sibling,1190
4,Parent,1336
3,Other family,1410
8,Spouse,2324
1,Child,2597
5,Partner,2692
2,Other - known to victim,10357
9,Stranger,16626
0,Acquaintance,25393


In [11]:
#Sort values so graphs are in ascending order
male_df = male_victims_df.reset_index().sort_values("Male Victim")

#Drop the highest outlier value: Relationship Undefined
male_df = male_df.drop(6)

#Find the sum of all Male Victims
male_df.sum()

#Calculate the percentage of male victims by relationship (23810 is the sum of all female victims)
male_percent = male_df['Male Victim']/63925*100
male_percent = male_percent.round()
male_percent

7     2.0
4     2.0
3     2.0
8     4.0
1     4.0
5     4.0
2    16.0
9    26.0
0    40.0
Name: Male Victim, dtype: float64

In [None]:
#Plot Male Victim Relationship Information
y_axis = np.arange(len(male_percent))
tick_locations = [value for value in y_axis]

#plt.figure(figsize=(20,3))
plt.barh(y_axis, male_percent, color='#721121', alpha= 1.0, align="center")
plt.yticks(tick_locations, male_df["Relationship"], fontname='Chaparral Pro', 
           fontsize=12, rotation="horizontal")

# Set a Title and labels
plt.title("Male Victim Relationship to Offender", fontname='Chaparral Pro', pad=20, fontsize=20)
plt.xlabel("Percentage of Murders", labelpad=20, fontname='Chaparral Pro', fontsize=12)
plt.show()

In [None]:
##GROUP WEAPON TYPE AND VICTIM RELATIONSHIP##
#--------
#Adjust relationship groups to reflect aquaintance, stranger, family, and romantic partner
close_contact = relationship_df[['Relationship', 'Weapon']]
close_contact['Relationship'] = close_contact['Relationship'].replace(
                                            {'Spouse': 'Romantic Partner',
                                            'Sibling': 'Family Member',
                                            'Parent': 'Family Member',
                                            'Child': 'Family Member',
                                            'Partner': 'Romantic Partner',
                                            'Other family': 'Family Member'})

#Find only the close contact weapon types grouped by relationship
close_contact = close_contact[close_contact.Weapon == 'Close Contact'].groupby('Relationship').count()
close_contact = close_contact.rename(columns={'Weapon': 'Close Contact'})

#--------
#Group together the minimal contact weapon types by relationship
minimal_contact= relationship_df[['Relationship', 'Weapon']]

#Adjust relationship groups to reflect aquaintance, stranger, family, and romantic partner
minimal_contact['Relationship'] = minimal_contact['Relationship'].replace(
                                            {'Spouse': 'Romantic Partner',
                                            'Sibling': 'Family Member',
                                            'Parent': 'Family Member',
                                            'Child': 'Family Member',
                                            'Partner': 'Romantic Partner',
                                            'Other family': 'Family Member'})

#Group minimal_contact weapons by relationshps
minimal_contact = minimal_contact[minimal_contact.Weapon == 'No/Little Contact'].groupby('Relationship').count()
minimal_contact = minimal_contact.rename(columns={'Weapon': 'No/Little Contact'})


#Merge the two types of weapons together based on relationship, reset index, drop "relatioship not determined"
weapon_type = pd.merge(close_contact, minimal_contact, how='outer', on=['Relationship']).reset_index()
weapon_type = weapon_type.drop(2)
weapon_type = weapon_type.drop(3)


In [None]:
#Show dataframe for pie chart 
weapon_type

In [None]:
#Create close contact murder pie chart
relationship = ['Aquaintance', 'Family Member', 'Romantic Partner', 'Stranger']
count = [8835, 4844, 4909, 3628]
colors = ["#C1CDC6", "#171B2A", "#D74125", "darkgrey"]

# Tell matplotlib to create a pie chart based upon the above data
plt.pie(count, labels=relationship, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=90)

plt.title("Victim Relationships: Close Contact Murder Methods", fontname='Chaparral Pro', fontsize=20)
plt.show()

In [None]:
relationship = ['Aquaintance', 'Family Member', 'Romantic Partner', 'Stranger']
count = [20943, 4881, 8679, 14774]
colors = ["#C1CDC6", "#171B2A", "#D74125", "darkgrey"]

# Tell matplotlib to create a pie chart based upon the above data
plt.pie(count, labels=relationship, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=90)

plt.title("Victim Relationships: No/Little Contact Murder Methods", fontname='Chaparral Pro', fontsize=20)
plt.show()