In [2]:
# Bokeh Data Visualizations - Titanic Dataset (10 Unique Interactive Plots)
!pip install bokeh

import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.transform import factor_cmap
from bokeh.layouts import gridplot
from bokeh.palettes import Category10, Spectral6

# Enable Bokeh to display plots inline in Jupyter
output_notebook()

# Load dataset
df = pd.read_csv("C:\\Users\\Admin\\Downloads\\train_dataset.csv")

# Prepare ColumnDataSource for hover
source = ColumnDataSource(df)


Collecting bokeh
  Downloading bokeh-3.8.0-py3-none-any.whl.metadata (10 kB)
Collecting xyzservices>=2021.09.1 (from bokeh)
  Downloading xyzservices-2025.4.0-py3-none-any.whl.metadata (4.3 kB)
Downloading bokeh-3.8.0-py3-none-any.whl (7.2 MB)
   ---------------------------------------- 0.0/7.2 MB ? eta -:--:--
   - -------------------------------------- 0.3/7.2 MB ? eta -:--:--
   ----- ---------------------------------- 1.0/7.2 MB 2.5 MB/s eta 0:00:03
   -------- ------------------------------- 1.6/7.2 MB 2.5 MB/s eta 0:00:03
   ----------- ---------------------------- 2.1/7.2 MB 2.6 MB/s eta 0:00:02
   -------------- ------------------------- 2.6/7.2 MB 2.6 MB/s eta 0:00:02
   ----------------- ---------------------- 3.1/7.2 MB 2.6 MB/s eta 0:00:02
   -------------------- ------------------- 3.7/7.2 MB 2.7 MB/s eta 0:00:02
   ----------------------- ---------------- 4.2/7.2 MB 2.6 MB/s eta 0:00:02
   --------------------------- ------------ 5.0/7.2 MB 2.6 MB/s eta 0:00:01
   -------

In [26]:
from bokeh.plotting import output_notebook
output_notebook()


In [28]:
#1.Age Distribution Histogram
import numpy as np
hist, edges = np.histogram(df['Age'].dropna(), bins=20)
p1 = figure(title="Age Distribution", x_axis_label='Age', y_axis_label='Count')
p1.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color="skyblue", line_color="black")
show(p1)

In [29]:
# 2. Survival Count (Bar Chart)
survived_counts = df['Survived'].value_counts().sort_index()
p2 = figure(x_range=['0','1'], title="Survival Count", x_axis_label='Survived', y_axis_label='Count')
p2.vbar(x=['0','1'], top=survived_counts.values, width=0.5, color=["red","green"])
show(p2)

In [30]:
#3.Passenger Class Distribution (Bar Chart)
pclass_counts = df['Pclass'].value_counts().sort_index()
p3 = figure(x_range=[str(i) for i in pclass_counts.index], title="Passenger Class Distribution", x_axis_label='Class', y_axis_label='Count')
p3.vbar(x=[str(i) for i in pclass_counts.index], top=pclass_counts.values, width=0.5, color=Category10[3])
show(p3)

In [31]:
#4.Gender Distribution (Pie-like Wedge)

from math import pi
from bokeh.transform import cumsum
gender_counts = df['Sex'].value_counts()
data = pd.DataFrame({'Sex': gender_counts.index, 'value': gender_counts.values})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = ["lightcoral", "lightskyblue"]
p4 = figure(title="Gender Distribution", toolbar_location=None, tools="hover", tooltips="@Sex: @value", x_range=(-0.5, 1.0))
p4.wedge(x=0, y=1, radius=0.4, start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'), line_color="white", fill_color='color', source=data)
show(p4)

In [32]:
# 5. Age vs Fare Scatter Plot
# ------------------------------------------------------------
p5 = figure(title="Age vs Fare", x_axis_label='Age', y_axis_label='Fare', tools="pan,wheel_zoom,box_zoom,reset,hover")
p5.circle('Age', 'Fare', size=7, source=source, color='purple', alpha=0.5)
hover = p5.select(dict(type=HoverTool))
hover.tooltips = [("Name", "@Name"), ("Age", "@Age"), ("Fare", "@Fare")]

show(p5)



In [33]:
#6.Fare by Passenger Class (Box Plot-like)
# Bokeh doesn't have built-in boxplot, simulate using quads
# We'll skip real boxplot, instead use vertical bars for average fare
avg_fare = df.groupby('Pclass')['Fare'].mean()
p6 = figure(x_range=[str(i) for i in avg_fare.index], title="Average Fare by Class", x_axis_label='Class', y_axis_label='Average Fare')
p6.vbar(x=[str(i) for i in avg_fare.index], top=avg_fare.values, width=0.5, color=Category10[3])
show(p6)

In [25]:
# 7.Survival rate by Pclass
surv_rate_class = df.groupby('Pclass')['Survived'].mean()

# Create figure
p7 = figure(title="Survival Rate by Class",
            x_axis_label='Class',
            y_axis_label='Survival Rate',
            x_range=[str(i) for i in surv_rate_class.index])

# Draw line
p7.line([str(i) for i in surv_rate_class.index],
        surv_rate_class.values,
        line_width=2, color="teal")

# Draw circle markers on the same points
p7.circle([str(i) for i in surv_rate_class.index],
          surv_rate_class.values,
          size=8, color="orange")

# Show plot
show(p7)




In [34]:
#8.Age Distribution by Survival (Histogram Overlay)
# ------------------------------------------------------------
hist0, edges0 = np.histogram(df[df['Survived']==0]['Age'].dropna(), bins=20)
hist1, edges1 = np.histogram(df[df['Survived']==1]['Age'].dropna(), bins=20)
p8 = figure(title="Age Distribution by Survival", x_axis_label='Age', y_axis_label='Count')
p8.quad(top=hist0, bottom=0, left=edges0[:-1], right=edges0[1:], color='red', alpha=0.5, legend_label="Did Not Survive")
p8.quad(top=hist1, bottom=0, left=edges1[:-1], right=edges1[1:], color='green', alpha=0.5, legend_label="Survived")
p8.legend.location = "top_right"
show(p8)

In [35]:
#9.Average Fare by Age (Line Plot)
# ------------------------------------------------------------
avg_fare_age = df.groupby('Age')['Fare'].mean()
p9 = figure(title="Average Fare by Age", x_axis_label='Age', y_axis_label='Average Fare')
p9.line(avg_fare_age.index, avg_fare_age.values, line_width=2, color='orange')
show(p9)

In [36]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.transform import factor_cmap
from bokeh.palettes import Category10

# Add string column for color mapping
df['Pclass_str'] = df['Pclass'].astype(str)

# Add a numeric size column for plotting
df['size'] = df['Pclass'] * 5

# ColumnDataSource
source = ColumnDataSource(df)

# Create figure
p10 = figure(title="Age vs Fare vs Class", x_axis_label='Age', y_axis_label='Fare',
             tools="pan,wheel_zoom,box_zoom,reset,hover")

# Draw circles using columns for color and size
p10.circle(
    x='Age',
    y='Fare',
    size='size',  # refer to the new size column
    color=factor_cmap('Pclass_str', palette=Category10[3], factors=['1','2','3']),
    alpha=0.6,
    source=source
)

# Hover tool
hover10 = p10.select(dict(type=HoverTool))
hover10.tooltips = [("Name", "@Name"), ("Age", "@Age"), ("Fare", "@Fare"), ("Class", "@Pclass")]

# Show plot
show(p10)




In [39]:
# Single Cell: All 10 Bokeh Plots (Corrected width/height)
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.transform import factor_cmap, cumsum
from bokeh.layouts import gridplot
from bokeh.palettes import Category10
from math import pi

output_notebook()

df = pd.read_csv("C:\\Users\\Admin\\Downloads\\train_dataset.csv")
df['Pclass_str'] = df['Pclass'].astype(str)
df['size'] = df['Pclass']*5
source = ColumnDataSource(df)

# 1 Age Histogram
hist, edges = np.histogram(df['Age'].dropna(), bins=20)
p1 = figure(title="Age Distribution", x_axis_label='Age', y_axis_label='Count', width=300, height=300)
p1.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color="skyblue", line_color="black")

# 2.Survival Count
survived_counts = df['Survived'].value_counts().sort_index()
p2 = figure(x_range=['0','1'], title="Survival Count", x_axis_label='Survived', y_axis_label='Count', width=300, height=300)
p2.vbar(x=['0','1'], top=survived_counts.values, width=0.5, color=["red","green"])

# 3.Passenger Class Distribution
pclass_counts = df['Pclass'].value_counts().sort_index()
p3 = figure(x_range=[str(i) for i in pclass_counts.index], title="Passenger Class Distribution",
            x_axis_label='Class', y_axis_label='Count', width=300, height=300)
p3.vbar(x=[str(i) for i in pclass_counts.index], top=pclass_counts.values, width=0.5, color=Category10[3])

# 4️⃣ Gender Pie
gender_counts = df['Sex'].value_counts()
data = pd.DataFrame({'Sex': gender_counts.index, 'value': gender_counts.values})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = ["lightcoral", "lightskyblue"]
p4 = figure(title="Gender Distribution", toolbar_location=None, tools="hover", tooltips="@Sex: @value", x_range=(-0.5,1.0), width=300, height=300)
p4.wedge(x=0, y=1, radius=0.4, start_angle=cumsum('angle', include_zero=True),
         end_angle=cumsum('angle'), line_color="white", fill_color='color', source=data)

# 5️⃣ Age vs Fare Scatter
p5 = figure(title="Age vs Fare", x_axis_label='Age', y_axis_label='Fare',
            tools="pan,wheel_zoom,box_zoom,reset,hover", width=300, height=300)
p5.circle('Age','Fare', size=7, source=source, color='purple', alpha=0.5)
hover = p5.select(dict(type=HoverTool))
hover.tooltips = [("Name","@Name"),("Age","@Age"),("Fare","@Fare")]

# 6️⃣ Avg Fare by Class
avg_fare = df.groupby('Pclass')['Fare'].mean()
p6 = figure(x_range=[str(i) for i in avg_fare.index], title="Average Fare by Class",
            x_axis_label='Class', y_axis_label='Average Fare', width=300, height=300)
p6.vbar(x=[str(i) for i in avg_fare.index], top=avg_fare.values, width=0.5, color=Category10[3])

# 7️⃣ Survival Rate by Class
surv_rate_class = df.groupby('Pclass')['Survived'].mean()
p7 = figure(title="Survival Rate by Class", x_axis_label='Class', y_axis_label='Survival Rate',
            x_range=[str(i) for i in surv_rate_class.index], width=300, height=300)
p7.line([str(i) for i in surv_rate_class.index], surv_rate_class.values, line_width=2, color="teal")
p7.circle([str(i) for i in surv_rate_class.index], surv_rate_class.values, size=8, color="orange")

# 8️⃣ Age Distribution by Survival
hist0, edges0 = np.histogram(df[df['Survived']==0]['Age'].dropna(), bins=20)
hist1, edges1 = np.histogram(df[df['Survived']==1]['Age'].dropna(), bins=20)
p8 = figure(title="Age Distribution by Survival", x_axis_label='Age', y_axis_label='Count', width=300, height=300)
p8.quad(top=hist0, bottom=0, left=edges0[:-1], right=edges0[1:], color='red', alpha=0.5, legend_label="Did Not Survive")
p8.quad(top=hist1, bottom=0, left=edges1[:-1], right=edges1[1:], color='green', alpha=0.5, legend_label="Survived")
p8.legend.location="top_right"

# 9️⃣ Avg Fare by Age
avg_fare_age = df.groupby('Age')['Fare'].mean()
p9 = figure(title="Average Fare by Age", x_axis_label='Age', y_axis_label='Average Fare', width=300, height=300)
p9.line(avg_fare_age.index, avg_fare_age.values, line_width=2, color='orange')

# 🔟 Age vs Fare vs Class Scatter
p10 = figure(title="Age vs Fare vs Class", x_axis_label='Age', y_axis_label='Fare',
             tools="pan,wheel_zoom,box_zoom,reset,hover", width=300, height=300)
p10.circle(x='Age', y='Fare', size='size',
           color=factor_cmap('Pclass_str', palette=Category10[3], factors=['1','2','3']),
           alpha=0.6, source=source)
hover10 = p10.select(dict(type=HoverTool))
hover10.tooltips = [("Name","@Name"),("Age","@Age"),("Fare","@Fare"),("Class","@Pclass")]

# Grid layout 2x5
grid = gridplot([[p1,p2,p3,p4,p5],
                 [p6,p7,p8,p9,p10]])

show(grid)


