In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# 59. Scatterplots with Seaborn

In [2]:
df_path = r'F:\DataSpell\data_science_ml_learning\Section 7 Seaborn Data Visualizations\data\dm_office_sales.csv'

In [3]:
df = pd.read_csv(df_path)

In [4]:
plt.figure(figsize=(10,5), dpi=150)

# sns.scatterplot(x='salary', y='sales', data=df, hue='salary')
sns.scatterplot(x='salary', y='sales', data=df, hue='level of education', palette='Set2', size='work experience', 
                s=200, alpha=1, style='level of education')

plt.savefig('scatterplot.png')

# 60. Distribution Plots - Part One - Understanding Plot Types

# 61. Distribution Plots - Part Two - Coding with Seaborn

In [5]:
plt.figure(figsize=(8,5), dpi=150)

sns.rugplot(x='salary', data=df, height=0.5)

In [6]:
plt.figure(figsize=(10,6), dpi=150)

# sns.set(style='whitegrid') # darkgrid, whitegrid, dark, white, ticks set the background style of the plot 
sns.displot(data=df, x='salary', bins=15, color='red', edgecolor='black', kde=True, rug=True)

In [7]:
sns.histplot(data=df, x='salary', kde=True)

In [8]:
sns.kdeplot(data=df, x='salary')

In [9]:
np.random.seed(42)
samples_ages = np.random.randint(0, 100, 200)

samples_ages = pd.DataFrame(samples_ages, columns=['age'])
sns.rugplot(data=samples_ages, x='age')

In [10]:
sns.displot(data=samples_ages, x='age', rug=True, bins=30, kde=True)

In [11]:
sns.kdeplot(data=samples_ages, x='age', clip=[0,100], bw_adjust=0.5, shade=True)

# 62. Categorical Plots - Statistics within Categories - Understanding Plot Types

# 63. Categorical Plots - Statistics within Categories - Coding with Seaborn

In [12]:
df_path = r'F:\DataSpell\data_science_ml_learning\Section 7 Seaborn Data Visualizations\data\dm_office_sales.csv'

In [13]:
df = pd.read_csv(df_path)

In [14]:
df['division'].value_counts()

In [15]:
plt.figure(figsize=(10,6), dpi=150)

sns.countplot(data=df, x='level of education', hue='division', palette='Set2')

# plt.ylim(0, 1000)

In [16]:
df['level of education'].value_counts()

In [17]:
plt.figure(figsize=(10,6), dpi=150)

sns.barplot(data=df, x='level of education', y='salary', estimator=np.mean, errorbar='sd', hue='division', palette='Set2')

plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

# 65. Categorical Plots - Distributions within Categories - Coding with Seaborn

In [18]:
df_path = r'F:\DataSpell\data_science_ml_learning\Section 7 Seaborn Data Visualizations\data\StudentsPerformance.csv'

In [19]:
df = pd.read_csv(df_path)

In [20]:
df.columns

In [21]:
plt.figure(figsize=(10,6), dpi=150)

# sns.boxplot(data=df, y='reading score', x='parental level of education', hue='test preparation course')
sns.boxplot(data=df, x='reading score', y='parental level of education', hue='test preparation course', palette='Set2')

plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [22]:
plt.figure(figsize=(12,8), dpi=150)

# sns.violinplot(data=df, x='reading score', y='parental level of education', hue='test preparation course', 
#                palette='Set2', split=True, inner='quartile')

sns.violinplot(data=df, x='reading score', y='parental level of education', hue='test preparation course',
               palette='Set2', bw_method=0.2)

plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [23]:
plt.figure(figsize=(12,8), dpi=150)

sns.swarmplot(data=df, x='math score', y='gender', size=7, hue='test preparation course', palette='Set2', dodge=True)

In [24]:
plt.figure(figsize=(12,8), dpi=150)

sns.boxenplot(data=df, x='math score', y='test preparation course', hue='gender', palette='Set2')

# 66. Seaborn - Comparison Plots - Understanding the Plot Types

# 67. Seaborn - Comparison Plots - Coding with Seaborn

In [25]:
df_path = r'F:\DataSpell\data_science_ml_learning\Section 7 Seaborn Data Visualizations\data\StudentsPerformance.csv'

In [26]:
df = pd.read_csv(df_path)

In [27]:
df

In [28]:
# sns.jointplot(data=df, x='math score', y='reading score', kind='reg')
# sns.jointplot(data=df, x='math score', y='reading score', kind='hex')
sns.jointplot(data=df, x='math score', y='reading score', kind='scatter', alpha=0.2, hue='gender')


In [29]:
# sns.pairplot(data=df, hue='gender', diag_kind='hist')
sns.pairplot(data=df, hue='gender', corner=True, diag_kind='kde', palette='Set2')

# 68. Seaborn Grid Plots

In [30]:
# sns.catplot(data=df, x='gender', y='math score', kind='boxen')
# sns.catplot(data=df, x='gender', y='math score', kind='box', row='lunch')
# sns.catplot(data=df, x='gender', y='math score', kind='box', hue='lunch')
# sns.catplot(data=df, x='gender', y='math score', kind='box', col='lunch')
# sns.catplot(data=df, x='gender', y='math score', kind='box', row='race/ethnicity', col='lunch')
sns.catplot(data=df, x='gender', y='math score', kind='violin', row='race/ethnicity', col='lunch')

In [31]:
g = sns.PairGrid(data=df, hue='gender')

g = g.map_upper(sns.scatterplot)

g = g.map_lower(sns.kdeplot)

g = g.map_diag(sns.histplot, kde=True)

g = g.add_legend()

# 69. Seaborn - Matrix Plots

In [34]:
df_path = r'F:\DataSpell\data_science_ml_learning\Section 7 Seaborn Data Visualizations\data\country_table.csv'

In [35]:
df = pd.read_csv(df_path)

In [37]:
df = df.set_index('Countries')

In [48]:
plt.figure(figsize=(10,6), dpi=150)

sns.heatmap(df.drop('Life expectancy', axis=1), linewidths=0.5, annot=True, cmap='viridis', center=40)

In [50]:
plt.figure(figsize=(10,6), dpi=150)

# sns.clustermap(df.drop('Life expectancy', axis=1), linewidths=0.5, annot=True, cmap='viridis', center=40, col_cluster=True)
sns.clustermap(df.drop('Life expectancy', axis=1), linewidths=0.5, annot=True, cmap='viridis', center=40, col_cluster=False)