In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)



In [None]:
file_kag = '/kaggle/input/mass-shooting-usa/shooting_1982-2023_cleaned.csv'
dat_mass = pd.read_csv(file_kag)
#Replaced NaN with '-' values
dat_mass = pd.read_csv(file_kag, na_values='-')
dat_mass.head()


In [None]:
# Replace "semiautomatic gun" with "One semiautomatic gun"
dat_mass['weapon_type'] = dat_mass['weapon_type'].replace("One semiautomatic gun", "semiautomatic gun")
dat_mass['weapon_type'] = dat_mass['weapon_type'].replace("semiautomatic guns", "semiautomatic gun")


# wanted to remove columns like description,summary,latitude,longitude,where_obtained.
col_rem = ['summary','description','latitude','longitude','where_obtained','weapons_obtained_legally','quarter','half','month_name','day_of_week']
dat_mass.drop(col_rem, axis=1, inplace=True)
# check action
dat_mass.head()

In [None]:
# I have to see Top-10  fatalities compared to injuries ? 
sum_yrwis = dat_mass.groupby('year')[['fatalities', 'injured']].sum()
# Wanted to sort it before I visualize this answer from 
sort_sum_yrwis = sum_yrwis.sort_values(by='fatalities',ascending=False)
top_10_years = sort_sum_yrwis.nlargest(10, 'fatalities')
top_10_years.reset_index(inplace=True)
print(top_10_years)

In [None]:

import matplotlib.pyplot as plt

# Create the bar chart with stacked bars
ax = top_10_years.plot(x='year', y=['fatalities', 'injured'], kind='bar', stacked=True, figsize=(10, 8))

# Plot the trend line on the same y-axis as the stacked bars
total_count = top_10_years['fatalities'] + top_10_years['injured']
max_total_count = total_count.max()
scaled_trend_line = total_count / max_total_count * ax.get_ylim()[1]
ax.plot(top_10_years['year'], scaled_trend_line, marker='o', color='black', label='Total')

# Set labels and title for the y-axis
ax.set_ylabel('Count (Fatalities & Injuries)')
ax.set_title('Total Fatalities & Injuries - Top 10 Years in US Mass Shootouts')

# Showing the legend to extreme right
ax.legend(loc='upper right')

# Created a Figure object
fig = plt.gcf()

# Saving the figure as a .png file in the output directory of kaggle 
output_path = "/kaggle/working/fat&inj.png"
fig.savefig(output_path, bbox_inches='tight', dpi=150)  # Use bbox_inches='tight' and dpi for proper saving
# Show the plot
plt.show()




In [None]:
# This block gives the mean value of Age of Shooters

age_yrwise = dat_mass.groupby('gender')[['age_of_shooter']].mean()
# Round the mean age values to zero decimal places (convert them to integers)
age_yrwise['age_of_shooter'] = age_yrwise['age_of_shooter'].round(0).astype(int)
# Rename the header 'age_of_shooter' to 'mean_age'
age_yrwise.rename(columns={'age_of_shooter': 'mean_age'}, inplace=True)
# Add a new column for the count of each gender
# Create the DataFrame
mean_df1 = pd.DataFrame(age_yrwise)

# This code block is for sex_wise count of shooters

gender_count = dat_mass.groupby('gender')[['injured']].count()
# Rename the header 'age_of_shooter' to 'mean_age'
gender_count.rename(columns={'injured': 'sex_total'}, inplace=True)
sex_df2 = pd.DataFrame(gender_count)

# here we are merging both data frames to form a new data frame for visualization
# Merge the DataFrames based on 'gender' column
merged_pie = pd.merge(mean_df1, sex_df2, on='gender')
print(merged_pie)

In [None]:

# Plot a pie chart
plt.figure(figsize=(6, 5))
plt.pie(merged_pie['sex_total'],labels=merged_pie['mean_age'], colors=['pink', 'lightblue', 'lightgreen', 'purple'])
plt.title('Age/Sex Distribution of Shooters')
plt.legend(title='Gender', loc='lower right', labels=merged_pie.index,bbox_to_anchor=(1.2, 1.2))

plt.axis('equal')
# Create a Figure object
fig = plt.gcf()

# Save the figure as a .png file in the output directory
output_path = "/kaggle/working/gender_plot.png"
fig.savefig(output_path, bbox_inches='tight', dpi=150)  # Use bbox_inches='tight' and dpi for proper saving
# Show the plot

plt.show()

In [None]:
# we have to see what is trend showing of fatalities over injuries for given years? 
tren_lin = dat_mass.groupby('year')[['fatalities', 'injured']].sum()
# Wanted to sort it before I visualize this answer from 
sort_tren_lin = tren_lin.sort_values(by='year',ascending=True)

print(sort_tren_lin)

In [None]:
trend_line = pd.DataFrame(sort_tren_lin)
plt.plot(sort_tren_lin['fatalities'] ,label='fatalities')
plt.plot(sort_tren_lin['injured'] , label='injured')

# Add labels and title
plt.xlabel('Year')
plt.ylabel('People affected')
plt.title('Trend Lines Plot')
plt.legend()


# Create a Figure object
fig = plt.gcf()

# Save the figure as a .png file in the output directory
output_path = "/kaggle/working/trend_line_plot.png"
fig.savefig(output_path, bbox_inches='tight', dpi=150)  # Use bbox_inches='tight' and dpi for proper saving
# Show the plot
plt.show()


In [None]:
# Using dat_mass dataframe for this activity
value_counts = dat_mass['weapon_type'].value_counts()
weap_df = pd.DataFrame(value_counts)
# Sort this using weapon_type
sort_val_count = weap_df.sort_values(by='weapon_type',ascending=False)
# I wanted to take only Top-10 weapon types
top_10_weapons = sort_val_count.nlargest(10, 'weapon_type')
# I just reset the index so that I form dataframe clearly
top_10_weapons.reset_index(inplace=True)
print(top_10_weapons)
# Need to work on the visualization after this.


In [None]:
# I wanted to Create a horizontal bar chart
# Sort the DataFrame by 'weapon_type' in ascending order
top_10_weapons = top_10_weapons.sort_values(by='weapon_type', ascending=True)
plt.barh(top_10_weapons['index'], top_10_weapons['weapon_type'],height=0.5)

# Add labels and title
plt.xlabel('Count')
plt.ylabel('Weapon Type')
plt.title('Top 10 Weapon Types')

# Show & save the plot

from IPython.display import Image  # Import the Image class
# Create a Figure object
fig = plt.gcf()
output_path = "/kaggle/working/top_10_weapon_types.png"
fig.savefig(output_path, bbox_inches='tight', dpi=150)  # Use bbox_inches='tight' and dpi for proper saving
plt.show()

In [None]:
# Using dat_mass dataframe for this activity
ment_counts = dat_mass['prior_signs_mental_health_issues'].value_counts()
ment_df = pd.DataFrame(ment_counts)
# Sorting  this using ment_type
sort_ment_count = ment_df.sort_values(by='prior_signs_mental_health_issues',ascending=False)
# I wanted to take only Top-5 mental sittuation types
top_5_mental_cond = sort_ment_count.nlargest(5, 'prior_signs_mental_health_issues')
# I just reset the index so that I form dataframe clearly
top_5_mental_cond.reset_index(inplace=True)
top_fiv = top_5_mental_cond
# But found it is too big column names and changing it
# Renamed  the columns bit easier to understand myself
top_fiv = top_fiv.rename(columns={'index': 'Mental_condition', 'prior_signs_mental_health_issues': 'Cont_type'})
# Merge the 'Unclear' row with the 'Yes' row and update the count becuase I could not believe Unclear numbers ,surely they must be mental patients !
top_fiv.loc[top_fiv['Mental_condition'] == 'Yes', 'Cont_type'] += top_fiv.loc[top_fiv['Mental_condition'] == 'Unclear', 'Cont_type'].sum()
# Drop the 'Unclear' row
top_fiv = top_fiv[top_fiv['Mental_condition'] != 'Unclear']

# Reset the index of the DataFrame
top_fiv = top_fiv.reset_index(drop=True)

# Need to work on the visualization after this.
print(top_fiv)

In [None]:

# Plot a pie chart
plt.figure(figsize=(6, 5))
plt.pie(top_fiv['Cont_type'],labels=top_fiv['Mental_condition'],autopct='%1.1f%%', colors=['orange', 'lightblue', 'yellow', 'violet'])
plt.title('Shooters - Mental Issues Distribution')
plt.legend(title='Ment_Iss_Cond', loc='upper right', labels=top_fiv['Mental_condition'],bbox_to_anchor=(1.6, 1.5))

plt.axis('equal')
# Create a Figure object
fig = plt.gcf()

#Save the figure as a .png file in the output directory
output_path = "/kaggle/working/mental_plot.png"
fig.savefig(output_path, bbox_inches='tight', dpi=150)  # Use bbox_inches='tight' and dpi for proper saving
# Show the plot
plt.show()