# Interactive plots

In this notebook I would like to present to you beautiful interactive plots you can create with not to much code, however there is still lots to explore and I believe you can create even better plots with a little bit more effort. But be careful there is a caveat: the more interactive plots you want to create the notebook become very slow to run and save it.

In [None]:
import numpy as np
import pandas as pd

import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
#from plotly.offline import init_notebook_mode, iplot
#init_notebook_mode(connected=True)

In [None]:
data_df = pd.read_csv("/kaggle/input/tabular-playground-series-jun-2022/data.csv")

In [None]:
data_df

## Create interactive plots for every column in dataset

In [None]:
# Create interactive plot for missing values
missing_ser = 100 * data_df.isna().sum() / len(data_df)

missing_ser = missing_ser[1:].sort_values(ascending=False)

# Set of colors
n_colors = len(missing_ser)
colors = px.colors.sample_colorscale("Phase", [n/(n_colors -1) for n in range(n_colors)])


trace0 = go.Bar(x=missing_ser.index,
                y=missing_ser.values,
                marker_color=colors)

data = [trace0]
layout = go.Layout(title="Missing values")
fig = go.Figure(data, layout)
fig.update_layout(xaxis_title="Columns name",
                 yaxis_title="% of missing values",
                 template="plotly_dark",
                 height=350)
fig.show()

In [None]:
trace0 = px.histogram(data_df, x='F_1_11', nbins=30, marginal='box', 
                      template='plotly_dark',height=350)
fig = go.Figure(trace0)
fig.update_traces(marker_line_width=1,marker_line_color="white")
fig.show()

In [None]:
features = data_df.columns[data_df.columns.str.startswith('F_1')]
num_rows = len(features) // 3
num_cols = 3

In [None]:
# Initialize figure with subplots
fig = make_subplots(rows=num_rows, cols=num_cols)

idx = -1
for row in range(1, num_rows+1):
    for col in range(1, num_cols+1):
        idx += 1
        fig.add_trace(go.Histogram(x=data_df[features[idx]], nbinsx=20, marker=dict(color = '#511845'), name=features[idx]), row=row, col=col)
        
fig.update_traces(marker_line_width=1,marker_line_color="white")
fig.update_layout(template='plotly_dark', height=600,
                      title= "Plots for features starts with F_1")
fig.show()

In [None]:
features = data_df.columns[data_df.columns.str.startswith('F_4')]
num_rows = len(features) // 3
num_cols = 3

In [None]:
# Initialize figure with subplots
fig = make_subplots(rows=num_rows, cols=num_cols)

idx = -1
for row in range(1, num_rows+1):
    for col in range(1, num_cols+1):
        idx += 1
        fig.add_trace(go.Histogram(x=data_df[features[idx]], nbinsx=20, marker=dict(color ='#C70039'), name=features[idx]), row=row, col=col)
        
fig.update_traces(marker_line_width=1,marker_line_color="white")
fig.update_layout(template='plotly_dark', height=600,
                      title= "Plots for features starts with F_4")
fig.show()

Unfortunately I had to comment out few plot as there is some problem with running and saving to many interactive plots in Kaggle notebook kernel. I haven't run this script in e.g. Collab but I hope there is no issue there. I wonder what is it that cause this behavour in Kaggle.

In [None]:
def create_plots(df, names, color):
    """
    This function creates interactive plots for given
    feature names
    """
    features = df.columns[df.columns.str.startswith(names)]
    num_rows = len(features) // 3
    num_cols = 3
    
    # Initialize figure with subplots
    fig = make_subplots(rows=num_rows, cols=num_cols)

    idx = -1
    for row in range(1, num_rows+1):
        for col in range(1, num_cols+1):
            idx += 1
            fig.add_trace(go.Histogram(x=df[features[idx]], nbinsx=20, marker=dict(color = color), name=features[idx]), row=row, col=col)
            
    if len(features) > 15:
        h = 1000
    elif len(features) <= 15:
        h = 600
        
    fig.update_traces(marker_line_width=1,marker_line_color="white")
    fig.update_layout(template='plotly_dark', height=h,
                      title= f"Plots for features starts with {names}")
    fig.show()

In [None]:
#create_plots(data_df,'F_1','#511845')

In [None]:
#create_plots(data_df, 'F_3','#FF5733')

In [None]:
#create_plots(data_df,'F_2','#900C3F')

In [None]:
#create_plots(data_df, 'F_4','#C70039')

# If you like it, do not forget to upvote ❤👍