In [None]:
#Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler


In [None]:
#Load the dataset
url = "https://data.ct.gov/api/views/rybz-nyjw/rows.csv?accessType=DOWNLOAD"
df = pd.read_csv(url)

In [None]:
#Display the first few rows of the dataset
print(df.head())


In [None]:
#Rename the columns to be more readable
df.rename(columns={
    "Date": "Date",
    "DateType": "Date Type",
    "Age": "Age",
    "Sex": "Gender",
    "Race": "Race",
    "ResidenceCity": "Residence City",
    "ResidenceCounty": "Residence County",
    "ResidenceState": "Residence State",
    "DeathCity": "Death City",
    "DeathCounty": "Death County",
    "Location": "Location",
    "LocationifOther": "Location if Other",
    "DescriptionofInjury": "Description of Injury",
    "InjuryPlace": "Injury Place",
    "InjuryCity": "Injury City",
    "InjuryCounty": "Injury County",
    "InjuryState": "Injury State",
    "CauseofDeath": "Cause of Death",
    "MannerofDeath": "Manner of Death",
    "AmendedMannerofDeath": "Amended Manner of Death",
    "DeathLoc": "Death Location",
    "Certifier": "Certifier",
    "Pronouncer": "Pronouncer",
    "PronouncerTitle": "Pronouncer Title",
    "OtherSignifican": "Other Significant Conditions ",
    "Heroin": "Heroin",
    "Cocaine": "Cocaine",
    "Fentanyl": "Fentanyl",
    "FentanylAnalogue": "Fentanyl Analogue",
    "Oxycodone": "Oxycodone",
    "Oxymorphone": "Oxymorphone",
    "Ethanol": "Ethanol",
    "Hydrocodone": "Hydrocodone",
    "Benzodiazepine": "Benzodiazepine",
    "Methadone": "Methadone",
    "Amphet": "Meth/Amphetamine",
    "Tramad": "Tramadol",
    "Morphine_NotHeroin": "Morphine (Not Heroin)",
    "Hydromorphone": "Hydromorphone",
    "Other": "Other",
    "AnyOpioid": "Any Opioid",
    "MannerofDeath": "Manner of Death",
    "DeathCityGeo": "DeathCityGeo",
    "ResidenceCityGeo": "ResidenceCityGeo",
    "InjuryCityGeo": "InjuryCityGeo"
}, inplace=True)


In [None]:
#Drop irrelevant columns
df = df.drop(["ID", "DateType", "Time", "LocationifOther"], axis=1)


In [None]:
#Replace missing values with "Unknown"
df.fillna("Unknown", inplace=True)


In [None]:
#Find and remove outliers
numeric_cols = ["Age"]
numeric_df = df[numeric_cols]
z_scores = np.abs(StandardScaler().fit_transform(numeric_df))
outliers = np.where(z_scores > 3)
df = df.drop(df.index[outliers])


In [None]:
# Explore the relationship between variables using a heatmap
sns.heatmap(df.corr(), annot=True)
plt.show()

# Explore the income distribution by gender
sns.histplot(data=df, x="Gender", hue="Income", multiple="stack")
plt.show()
