In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [22]:
df = sns.load_dataset('iris')
df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [23]:
df.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

## Scatter plot

In [24]:
fig_1 = px.scatter(df, x='sepal_length', y= 'sepal_width', color='species')
fig_1.show()

# save
fig_1.write_image('./outputs/fig_1.png', scale = 3)

### Line plot

In [25]:
# short dataset
df_line = df.sort_values(by=['sepal_length'])

fig_2 = px.line(df_line, x='sepal_length', y= 'sepal_width', color='species')
fig_2.show()


# save
fig_1.write_image('./outputs/df_line.png', scale = 3)

Bar plot

In [26]:
# grouping values based on means
df_bar = df.groupby(['species']).mean().reset_index()
df_bar.head()

Unnamed: 0,species,sepal_length,sepal_width,petal_length,petal_width
0,setosa,5.006,3.428,1.462,0.246
1,versicolor,5.936,2.77,4.26,1.326
2,virginica,6.588,2.974,5.552,2.026


In [27]:
fig_3 = px.bar(df_bar, x='species', y='sepal_width')
fig_3.show()

In [28]:
# box plot
fig_4 = px.box(df, x='species', y='sepal_width', color='species')
fig_4.show()

In [29]:
# violin plot
fig_5 = px.violin(df, x='species',y='sepal_width', color='species', box=True)
fig_5.show()

# save as image
fig_1.write_image('./outputs/violin.png', scale = 3)

In [30]:
# histogram
fig_6 = px.histogram(df, x='sepal_width', color='species')
fig_6.show()


In [33]:
# pie chart
df_pie = df['species'].value_counts().reset_index()
df_pie.columns = ['species','count']


fig_7 = px.pie(df_pie, names='species', values='count')
fig_7.show()



In [34]:
# 3D scatter plot

fig_8 = px.scatter_3d(df, x='sepal_width', y='species', z='petal_width', color='species')
fig_8.show()

In [36]:
# area chart

df_area = df.sort_values(by=['sepal_length'])
fig_9 = px.area(df_area, x='sepal_width', y='sepal_length', color='species')
fig_9.show()

In [43]:
# sunburst chart
df_sunburst = df.groupby(['species','petal_width']).size().reset_index(name='counts')

fig_10 = px.sunburst(df_sunburst, path=['species','petal_width'],values='counts')
fig_10.show()

In [51]:

## Add a numerice species_id based on species names for coloring
# df['species_id'] = df['species'].astype('category').cat.codes

# perallel coordinates plot
# { incompleted # fig_11 = px.parallel_coordinates(df,color='species-id', labels={'species_id': 'species'}, color_continuous_scale=)}

# fig_11 = px.parallel_coordinates(df,  labels={'species_id': 'species'})
fig_11 = px.parallel_coordinates(df,color='species_id', labels=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'])
fig_11.show()

In [52]:
# Density plot
fig_13 = px.density_contour(df, x='sepal_length', y='sepal_width',color='species')
fig_13.show()

In [55]:
# Ternary plot

fig_14 = px.scatter_ternary(df, a='sepal_length',b = 'sepal_width', c ='petal_width', color='species')
fig_14.show()

In [58]:
# Polar chart

df_redar = df.groupby(['species']).mean().reset_index()

fig_15 = px.line_polar(df_redar, r='sepal_length', theta='species', line_close=True)
fig_15.show()