<a href="https://colab.research.google.com/github/andrybrew/socialmediaanalytic/blob/master/002_data_visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Visualization

## Import Libraries

In [0]:
# Import Library for Data Manipulation
import pandas as pd

In [0]:
# Import Libraries for Visualization
import matplotlib.pyplot as plt
import seaborn as sns

## Import Data

In [0]:
# Import Data From an online link (GitHub)
df = pd.read_csv('https://raw.githubusercontent.com/dianrdn/data/master/suicide_germany2.csv', sep=',',)
df

In [0]:
# Prints the Mmount of Rows and Column Numbers
df.shape

In [0]:
# Prints Information About a DataFrame
df.info()

## Distribution Plot

In [0]:
# Set Graph Size
plt.rcParams['figure.figsize'] = (16, 8)

In [0]:
# Visualize Distribution
sns.kdeplot(df['suicides_no'])
plt.xlabel('Number of Suicides')
plt.ylabel('Probability Density Function / The Likelihood of Outcome')
plt.title('Number of Suicide Distribution')

In [0]:
# Visualize Histogram
sns.distplot(df['suicides_no'])
plt.xlabel('Number of Suicides')
plt.ylabel('Probability Density Function / The Likelihood of Outcome')
plt.title('Number of Suicide Distribution')

In [0]:
# Set Number of Category
sns.distplot(df['suicides_no'], bins=50)
plt.xlabel('Number of Suicides')
plt.ylabel('Probability Density Function / The Likelihood of Outcome')
plt.title('Number of Suicide Distribution')

## Pair Plot

In [0]:
# Visualize Pair Plot
sns.pairplot(df)

In [0]:
# Visualize Pair Plot with Colors
sns.pairplot(df, hue='sex')

## Bar Plot

In [0]:
# Draw Basic Bar Plot
sns.barplot(x='generation', y='suicides_no', data=df)

In [0]:
# Draw a Set of Vertical Bars with Different Colors based on Sex
sns.barplot(x='generation', y='suicides_no', hue='sex', data=df)

In [0]:
# Add Error Bars
sns.barplot(x='generation', y='suicides_no', hue='sex', capsize=.2, data=df)

In [0]:
# Make Different Plot based on Attribute : Sex
sns.catplot(x='generation', y='suicides_no', hue='sex', capsize=.2, kind='bar', col='sex', data=df)

## Line Plot

In [0]:
# Draw Basic Line Plot
sns.lineplot(x='year', y='suicides_no', data=df)

In [0]:
# Draw Line Plot with Different Colors based on Age
sns.lineplot(x='year', y='suicides_no', hue='age', palette='hls', data=df)

In [0]:
# Make Different Plot based on Attribute : Sex
sns.relplot(x='year', y='suicides_no', hue='age', palette='hls', col='sex', kind='line', data=df)

## Scatter Plot

In [0]:
# Basic Scatterplot
sns.scatterplot(x='year', y='suicides_no', data= df)

In [0]:
# Show the Groups with Different Colors based on Age
sns.scatterplot(x='year', y='suicides_no', hue='age', data=df)

In [0]:
# Show the Grouping Variable by Marker Style based on Sex
sns.scatterplot(x='year', y='suicides_no', hue='age', style='sex', data=df)

In [0]:
# Apply Different Color Palette
sns.scatterplot(x='year', y='suicides_no', hue='age', style='sex', palette='hls', data=df)

In [0]:
# Set Points Size Proportional to Age
sns.scatterplot(x='year', y='suicides_no', hue='age', style='sex', palette='hls', size='age', sizes=(20, 200), data=df)

In [0]:
# Make Different Plot based on Attribute : Sex
sns.relplot(x='year', y='suicides_no', hue='age', style='sex', palette='hls', size='age', sizes=(20, 200), col='sex', kind='scatter', data=df)

In [0]:
# Make Different Plot based on Attribute : Age
sns.relplot(x='year', y='suicides_no', hue='age', style='sex', palette='hls', size='age', sizes=(20, 200), col='age', kind='scatter', data=df)

## Regression Plot

In [0]:
# Draw Regression Plot
sns.lmplot(x='year', y='suicides_no', data=df)

In [0]:
# Draw Multiple Regression Plot
sns.lmplot(x='year', y='suicides_no', hue='sex', data=df)

## Joint Plot

In [0]:
# Draw Joint Plot
sns.jointplot(x='year', y='suicides_no', data=df)

In [0]:
# Draw Regression Plot
sns.jointplot(x='year', y='gdp_for_year', data=df, kind='reg', truncate=False)

In [0]:
# Draw Hexagon Plot
sns.jointplot(x='year', y='gdp_for_year', kind='hex', data=df)

In [0]:
# Draw Kernel Density Estimation Plot
sns.jointplot(x='year', y='gdp_for_year', kind='kde', data=df)

## Box Plot

In [0]:
# Draw Box Plot
sns.boxplot(x='year', y='suicides_no', data=df)

In [0]:
# Draw a Set of Box Plot with Different Colors based on Sex
sns.boxplot(x='year', y='suicides_no', hue='sex', palette='hls', data=df)

In [0]:
# Make Different Plot based on Attribute : Sex
sns.catplot(x='year', y='suicides_no', hue='sex', palette='hls', kind='box', col='sex', data=df)

## Boxen Plot

In [0]:
# Draw Boxen Plot
sns.boxenplot(x='year', y='suicides_no', data=df)

In [0]:
# Draw a Set of Boxen Plot with Different Colors based on Sex
sns.boxenplot(x='year', y='suicides_no', hue='sex', palette='hls', data=df)

In [0]:
# Make Different Plot based on Attribute : Sex
sns.catplot(x='year', y='suicides_no', hue='sex', palette='hls', kind='boxen', col='sex', data=df)

## Violin Plot

In [0]:
# Draw Basic Violin Plot
sns.violinplot(x='year', y='suicides_no', split=True, data=df)

In [0]:
# with Different Colors based on Sex
sns.violinplot(x='year', y='suicides_no', hue='sex', palette='hls', split=True, data=df)

In [0]:
# Make Different Plot based on Attribute : Sex
sns.catplot(x='year', y='suicides_no', hue='sex', palette='hls', kind='violin', col='sex', data=df)

## Correlation Map

In [0]:
# Select Only Numeric Attribute
df_num = df.select_dtypes(include=['float64', 'int64'])

In [0]:
# Draw Correlation Map
sns.clustermap(df_num.corr(), center=0, cmap='vlag', linewidths=.75)