In [6]:
import pandas as pd

In [None]:
url = "https://raw.githubusercontent.com/itsjenjenbarrameda/ICT-FINAL-20/refs/heads/main/suicide%20rate.csv"

In [7]:
df = pd.read_csv(url)

FileNotFoundError: [Errno 2] No such file or directory: 'suicide rate.csv'

In [None]:
df.info(), df.head()

In [None]:
df['gdp_for_year ($)'] = df[' gdp_for_year ($) '].str.replace(',', '').astype(float)

In [None]:
df.drop(columns=[' gdp_for_year ($) '], inplace=True)

In [None]:
df_clean = df.dropna()

In [None]:
Q1 = df_clean['suicides/100k pop'].quantile(0.25)
Q3 = df_clean['suicides/100k pop'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

df_clean = df_clean[(df_clean['suicides/100k pop'] >= lower_bound) &
                    (df_clean['suicides/100k pop'] <= upper_bound)]

In [None]:
cleaned_count = len(df_clean)

df_clean.info(), cleaned_count

In [None]:
import numpy as np
from scipy.stats import pearsonr
import statsmodels.api as sm

In [None]:
mean_suicide_rate = np.mean(df_clean['suicides/100k pop'])

In [None]:
std_gdp_per_capita = np.std(df_clean['gdp_per_capita ($)'])

In [None]:
correlation_matrix = np.corrcoef(df_clean['suicides_no'], df_clean['population'])

In [None]:
max_population = np.max(df_clean['population'])

In [None]:
unique_years = np.unique(df_clean['year'])

In [None]:
scipy_corr, scipy_p = pearsonr(df_clean['gdp_per_capita ($)'], df_clean['suicides/100k pop'])

In [None]:
X = df_clean['gdp_per_capita ($)']
Y = df_clean['suicides/100k pop']
X = sm.add_constant(X)  # adding a constant

model = sm.OLS(Y, X).fit()
regression_summary = model.summary()

{
    "NumPy": {
        "Mean Suicide Rate": mean_suicide_rate,
        "Std GDP per Capita": std_gdp_per_capita,
        "Correlation Matrix (Suicides vs Pop)": correlation_matrix.tolist(),
        "Max Population": max_population,
        "Unique Years Count": len(unique_years)
    },
    "SciPy": {
        "Pearson Correlation": scipy_corr,
        "P-Value": scipy_p
    },
    "Statsmodels Regression Summary": str(regression_summary)
}

In [None]:
mean_rate = np.mean(df_clean['suicides/100k pop'])
std_dev = np.std(df_clean['suicides/100k pop'])
min_val = np.min(df_clean['suicides/100k pop'])
max_val = np.max(df_clean['suicides/100k pop'])
median_val = np.median(df_clean['suicides/100k pop'])

print(mean_rate, std_dev, min_val, max_val, median_val)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv(url)

In [None]:
df['gdp_for_year ($)'] = df[' gdp_for_year ($) '].str.replace(',', '').astype(float)
df.drop(columns=[' gdp_for_year ($) '], inplace=True)
df_clean = df.dropna()

In [None]:
Q1 = df_clean['suicides/100k pop'].quantile(0.25)
Q3 = df_clean['suicides/100k pop'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df_clean = df_clean[(df_clean['suicides/100k pop'] >= lower_bound) &
                    (df_clean['suicides/100k pop'] <= upper_bound)]


In [None]:
sns.set(style="whitegrid")

In [None]:
sns.histplot(df_clean['suicides/100k pop'], kde=True)
plt.title('Distribution of Suicide Rates')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(data=df_clean, x='sex', y='suicides/100k pop')
plt.title("Suicide Rate by Gender")
plt.xlabel("Gender")
plt.ylabel("Suicides per 100k Population")
plt.tight_layout()
plt.savefig("plot2_gender.png")
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=df_clean, x='age', y='suicides/100k pop', hue='age', dodge=False, palette='viridis')
plt.legend().remove()
plt.title('Average Suicide Rate by Age Group')
plt.xlabel('Age Group')
plt.ylabel('Suicides per 100k Population')
plt.xticks(rotation=30)
plt.tight_layout()
plt.show()

In [None]:
top_countries = df_clean.groupby('country')['suicides/100k pop'].mean().sort_values(ascending=False).head(5).index
plt.figure(figsize=(14, 7))
for country in top_countries:
    country_data = df_clean[df_clean['country'] == country]
    country_yearly_avg = country_data.groupby('year')['suicides/100k pop'].mean()
    plt.plot(country_yearly_avg.index, country_yearly_avg.values, label=country)
plt.title('Suicide Rate Over Time (Top 5 Countries)')
plt.xlabel('Year')
plt.ylabel('Average Suicides per 100k Population')
plt.legend()
plt.tight_layout()
plt.savefig('plot4_time_country.png')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.regplot(data=df_clean, x='gdp_per_capita ($)', y='suicides/100k pop', line_kws={"color": "red"})
plt.title("Suicide Rate vs GDP per Capita")
plt.xlabel("GDP per Capita ($)")
plt.ylabel("Suicides per 100k Population")
plt.tight_layout()
plt.savefig("plot1_regression.png")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from scipy.stats import pearsonr

In [None]:
corr1, _ = pearsonr(df_clean['suicides/100k pop'], df_clean['gdp_per_capita ($)'])
corr2, _ = pearsonr(df_clean['suicides/100k pop'], df_clean['year'])

print("GDP correlation:", corr1)
print("Year correlation:", corr2)