# **DATA ANALYSIS USING PYTHON**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

In [None]:
dataframe = pd.read_csv('master.csv')
dataframe.columns

In [None]:
dataframe.info()

# removing unappropriate columns, in this case 'HDI for year'
dataframe.drop(columns=['HDI for year', 'generation'], inplace=True)

## **Let's do trend analysis across countries**

In [None]:
# taking input to do the analysis on a specific country
country_X = input('Enter country name: ').title()
data_of_X_country = dataframe[dataframe['country'] == country_X]

# calculating all the suicides have been done by the people of country X
suicides_from_X_country = data_of_X_country['suicides_no'].sum()

# grouping the data by year
groupedby_year = data_of_X_country.groupby('year')
no_of_suicides = groupedby_year['suicides_no'].sum()

# now we will make a graph to represent the trend of suicide in country X 
no_of_suicides.plot(kind='line', x='year', y='suicides_no', marker='o', color='red', linewidth=0.8)

# doing some changes to make graph more visually appealing
plt.xlabel('Year', fontname='Jetbrains Mono', fontsize=10, fontweight='bold', fontstyle='italic')
plt.ylabel('Number of Suicides', fontname='Jetbrains Mono', fontsize=10, fontweight='bold', fontstyle='italic')
plt.title(f"Number of Suicides by Year in {country_X}", fontname='Georgia', fontsize=12)

# adding some additional annotation on the visual
total_suicides = f"Total Suicides: {suicides_from_X_country}"
plt.annotate(total_suicides, xy=(0.02, 0.95), xycoords='axes fraction', fontsize=10, fontweight='bold', color='black')

plt.show()

## **Let's do gender analysis on our dataset**

In [None]:
groupedby_sex = dataframe.groupby('sex')
suicides_by_sex = groupedby_sex['suicides_no'].sum()

# set a custom color palette
colors = ['#598392', '#124559']

# create a figure and axis object
fig, ax = plt.subplots(figsize=(5, 5))

# create a pie chart
ax.pie(suicides_by_sex, labels=suicides_by_sex.index, colors=colors, autopct='%1.1f%%', startangle=90)

# add a title and legend
ax.set_title('Suicides by Sex')
ax.legend(title='Sex', loc='best')

# make the pie chart circular
ax.axis('equal')

# display the plot
plt.show()

## **Let's do trend analysis by year_range**

In [None]:
# creating a new column to divide years in 7 groups
bins = [1985, 1990, 1995, 2000, 2005, 2010, 2016]
labels = ['1985-1990', '1991-1995', '1996-2000', '2001-2005', '2006-2010', '2011-2016']

# Using pd.cut to create a new column with the assigned category for each row
dataframe['year_range'] = pd.cut(dataframe['year'], bins=bins, labels=labels)

# now we will likley get to see the trend by time
groupedby_year_range = dataframe.groupby('year_range')['suicides_no'].sum()

# Creating a bar chart of the grouped data with custom style and layout
fig, ax = plt.subplots(figsize=(8, 5))
groupedby_year_range.plot(kind='bar', ax=ax, color='#335c67')
ax.set_xlabel('Year Range', fontname='Jetbrains Mono', fontsize=15, fontweight='bold')
ax.set_ylabel('Total Number of Suicides', fontname='Jetbrains Mono', fontsize=15, fontweight='bold')
ax.set_title('Suicide Rates by Year Range')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## **Now finally time has come to do age analysis**
* We will get to see, is age one of the factors leading to incresae in suicide rate ..?

In [None]:
groupedby_age = dataframe.groupby('age')['suicides_no'].sum()

# let's make visual to make it more presentable
fig, ax = plt.subplots(figsize=(8, 5))
groupedby_age.plot.area(ax=ax, color='#1d3557', alpha=0.7)
ax.set_xlabel('Age Group', fontname='Jetbrains Mono', fontsize=15, fontweight='bold')
ax.set_ylabel('Total Number of Suicides', fontname='Jetbrains Mono', fontsize=15, fontweight='bold')
ax.set_title('Suicide Rates by Age Group')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
plt.xticks(rotation=45, ha='right')
plt.style.use('ggplot')
plt.tight_layout()
plt.show()

# **NOW LET'S PUT ALL THE POINTS TOGETHER WE HAVE COLLECTED FROM THIS DATASET:-**
1. Some countries like United States have seen drastically increase in suicidal rates over years whereas some countries like Germany have faced something completely opposite.
2. We can clearly see the men are most likely to be victim of suicides compare to women
3. There was a time around 2000's when suicide rate has increased at sudden rate but after that there is no relation between increase in suicidal_rate over time.
4. Age group plays the major role in suicides, to be more precise
  * Kids are almost save from these kind of accidents
  * But as kids grows they are more likley to be the victim
  * And as they become senior graphs are again falling down means they are less likely to be the victim
  
## **You can find more useful insights out of this dataset, so use it as you like**