In [1]:
import os
import shutil

# Create a directory for your charts
chart_dir = "obesity_analysis_charts"
if os.path.exists(chart_dir):
    shutil.rmtree(chart_dir)  # Remove if exists
os.makedirs(chart_dir)

print(f"Created directory: {chart_dir}")

Created directory: obesity_analysis_charts


In [2]:
from ucimlrepo import fetch_ucirepo 
import pandas as pd
import plotly.express as px

# Fetch dataset 
estimation_of_obesity_levels = fetch_ucirepo(id=544) 

# Get data
X = estimation_of_obesity_levels.data.features 
y = estimation_of_obesity_levels.data.targets 

# Convert y from DataFrame to Series
target_column_name = y.columns[0]
y = y[target_column_name]

# Now you can proceed with creating charts

In [3]:
# Save Interactive Charts as HTML Files

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np

# Function to save charts
def save_chart(fig, filename, description=""):
    """Save a Plotly figure as an interactive HTML file"""
    filepath = os.path.join(chart_dir, filename)
    fig.write_html(filepath, include_plotlyjs='cdn')
    print(f"Saved: {filename} - {description}")
    return filepath

In [4]:
# Create pie chart for obesity distribution
fig1 = px.pie(
    names=y.value_counts().index,
    values=y.value_counts().values,
    title='Distribution of Obesity Levels in the Dataset',
    color_discrete_sequence=px.colors.sequential.RdBu,
    hole=0.3,
    template='plotly_white'
)

fig1.update_traces(
    textposition='inside',
    textinfo='percent+label',
    hoverinfo='label+percent+value',
    hovertemplate="<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}"
)

fig1.update_layout(
    annotations=[dict(text='Obesity<br>Levels', x=0.5, y=0.5, font_size=20, showarrow=False)],
    showlegend=True,
    legend_title_text="Obesity Categories",
    height=600
)

save_chart(fig1, "1_obesity_distribution.html", "Interactive pie chart showing obesity level distribution")

Saved: 1_obesity_distribution.html - Interactive pie chart showing obesity level distribution


'obesity_analysis_charts\\1_obesity_distribution.html'

In [5]:
# Calculate BMI if we have Height and Weight
if 'Height' in X.columns and 'Weight' in X.columns:
    # Calculate BMI
    bmi = X['Weight'] / ((X['Height']/100) ** 2)
    
    # Create BMI visualizer
    fig2 = go.Figure()
    
    # Add scatter plot
    fig2.add_trace(go.Scatter(
        x=X['Weight'],
        y=X['Height'],
        mode='markers',
        marker=dict(
            size=8,
            color=bmi,
            colorscale='RdYlBu_r',
            showscale=True,
            colorbar=dict(title="BMI", tickvals=[18.5, 25, 30, 35, 40], 
                         ticktext=['Under', 'Normal', 'Over', 'Obese I', 'Obese II+']),
            line=dict(width=1, color='DarkSlateGrey')
        ),
        text=[f"Person {i+1}<br>Weight: {w}kg<br>Height: {h}cm<br>BMI: {b:.1f}<br>Obesity: {o}" 
              for i, (w, h, b, o) in enumerate(zip(X['Weight'], X['Height'], bmi, y))],
        hoverinfo='text',
        name='Individuals'
    ))
    
    # Add BMI category reference lines
    bmi_categories = [
        (18.5, "Underweight", "#00CC96"),
        (25, "Normal", "#19D3F3"),
        (30, "Overweight", "#FFA15A"),
        (35, "Obese I", "#EF553B"),
        (100, "Obese II+", "#AB63FA")
    ]
    
    # Create reference lines for average height
    avg_height = X['Height'].mean()
    
    for i, (bmi_threshold, label, color) in enumerate(bmi_categories):
        weight_at_threshold = bmi_threshold * ((avg_height/100) ** 2)
        fig2.add_trace(go.Scatter(
            x=[weight_at_threshold, weight_at_threshold],
            y=[avg_height - 10, avg_height + 10],
            mode='lines',
            line=dict(color=color, width=3, dash='dash'),
            name=f"{label} (BMI ≥ {bmi_threshold})",
            showlegend=True,
            hoverinfo='skip'
        ))
    
    fig2.update_layout(
        title='BMI Visualizer: Height vs Weight',
        xaxis_title='Weight (kg)',
        yaxis_title='Height (cm)',
        hovermode='closest',
        template='plotly_white',
        height=700,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        )
    )
    
    save_chart(fig2, "2_bmi_visualizer.html", "Interactive BMI calculator and visualizer")

Saved: 2_bmi_visualizer.html - Interactive BMI calculator and visualizer


In [6]:
if 'Age' in X.columns:
    # Create age distribution chart
    fig3 = px.box(
        pd.DataFrame({'Age': X['Age'], 'Obesity Level': y}),
        x='Obesity Level',
        y='Age',
        color='Obesity Level',
        title='Age Distribution by Obesity Level',
        color_discrete_sequence=px.colors.qualitative.Set3,
        template='plotly_white',
        points='all'
    )
    
    fig3.update_layout(
        xaxis_title="Obesity Level",
        yaxis_title="Age (years)",
        height=600,
        showlegend=False
    )
    
    fig3.update_xaxes(tickangle=45)
    
    save_chart(fig3, "3_age_distribution.html", "Age analysis by obesity level")

Saved: 3_age_distribution.html - Age analysis by obesity level


In [7]:
# Create a dashboard of lifestyle factors
lifestyle_factors = ['FCVC', 'FAF', 'CH2O', 'NCP', 'TUE']
available_factors = [f for f in lifestyle_factors if f in X.columns]

if available_factors:
    # Create subplot grid
    rows = len(available_factors)
    cols = 2
    fig4 = make_subplots(
        rows=rows, cols=cols,
        subplot_titles=[f"{factor} Distribution" for factor in available_factors] + 
                      [f"{factor} by Obesity Level" for factor in available_factors],
        vertical_spacing=0.1,
        horizontal_spacing=0.15
    )
    
    factor_descriptions = {
        'FCVC': 'Frequency of Vegetable Consumption',
        'FAF': 'Physical Activity Frequency',
        'CH2O': 'Daily Water Intake',
        'NCP': 'Number of Main Meals',
        'TUE': 'Technology Use (hours)'
    }
    
    for i, factor in enumerate(available_factors, 1):
        row = i
        
        # Left column: Distribution histogram
        fig4.add_trace(
            go.Histogram(
                x=X[factor],
                nbinsx=20,
                name=f'{factor} Distribution',
                marker_color=px.colors.sequential.Blues[3],
                showlegend=False
            ),
            row=row, col=1
        )
        
        # Right column: Box plot by obesity level
        fig4.add_trace(
            go.Box(
                y=X[factor],
                x=y,
                name=f'{factor} by Obesity',
                marker_color=px.colors.qualitative.Set2[i % len(px.colors.qualitative.Set2)],
                showlegend=False
            ),
            row=row, col=2
        )
        
        # Update y-axis titles
        description = factor_descriptions.get(factor, factor)
        fig4.update_yaxes(title_text=description, row=row, col=1)
        fig4.update_yaxes(title_text=description, row=row, col=2)
    
    fig4.update_layout(
        title='Lifestyle Factors Analysis Dashboard',
        template='plotly_white',
        height=300 * rows,
        showlegend=False
    )
    
    fig4.update_xaxes(tickangle=45, row=1, col=2)
    fig4.update_xaxes(tickangle=45, row=2, col=2)
    fig4.update_xaxes(tickangle=45, row=3, col=2)
    
    save_chart(fig4, "4_lifestyle_dashboard.html", "Comprehensive lifestyle factors analysis")

Saved: 4_lifestyle_dashboard.html - Comprehensive lifestyle factors analysis


In [8]:
# Create a correlation heatmap
# First, prepare the data
df_numeric = pd.DataFrame()
for col in X.columns:
    if pd.api.types.is_numeric_dtype(X[col]):
        df_numeric[col] = X[col]
    else:
        # Encode categorical columns
        df_numeric[col] = pd.factorize(X[col])[0]

# Add target (if it's categorical, encode it)
if not pd.api.types.is_numeric_dtype(y):
    df_numeric['Obesity_Level'] = pd.factorize(y)[0]
else:
    df_numeric['Obesity_Level'] = y

corr_matrix = df_numeric.corr()

fig5 = go.Figure(data=go.Heatmap(
    z=corr_matrix.values,
    x=corr_matrix.columns,
    y=corr_matrix.columns,
    colorscale='RdBu',
    zmid=0,
    text=np.round(corr_matrix.values, 2),
    texttemplate='%{text}',
    textfont={"size": 10},
    hoverongaps=False,
    hovertemplate='<b>%{x}</b> vs <b>%{y}</b><br>Correlation: %{z:.2f}<extra></extra>'
))

fig5.update_layout(
    title='Feature Correlation Heatmap',
    template='plotly_white',
    height=800,
    xaxis=dict(tickangle=45),
    yaxis=dict(tickangle=0)
)

save_chart(fig5, "5_correlation_heatmap.html", "Interactive correlation matrix of all features")

Saved: 5_correlation_heatmap.html - Interactive correlation matrix of all features


'obesity_analysis_charts\\5_correlation_heatmap.html'

In [9]:
# Create a feature importance chart using Random Forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Prepare data for feature importance
X_encoded = X.copy()
for col in X_encoded.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    X_encoded[col] = le.fit_transform(X_encoded[col])

y_encoded = LabelEncoder().fit_transform(y)

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_encoded, y_encoded)

# Get feature importance
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': rf.feature_importances_
}).sort_values('Importance', ascending=True)

fig6 = go.Figure()

fig6.add_trace(go.Bar(
    x=feature_importance['Importance'],
    y=feature_importance['Feature'],
    orientation='h',
    marker=dict(
        color=feature_importance['Importance'],
        colorscale='Viridis',
        showscale=True,
        colorbar=dict(title="Importance")
    ),
    text=[f"{imp:.3f}" for imp in feature_importance['Importance']],
    textposition='auto',
    hovertemplate='<b>%{y}</b><br>Importance: %{x:.4f}<extra></extra>'
))

fig6.update_layout(
    title='Feature Importance for Obesity Prediction',
    xaxis_title='Importance Score',
    yaxis_title='Feature',
    template='plotly_white',
    height=600,
    showlegend=False
)

save_chart(fig6, "6_feature_importance.html", "Feature importance ranking for obesity prediction")

Saved: 6_feature_importance.html - Feature importance ranking for obesity prediction


'obesity_analysis_charts\\6_feature_importance.html'

In [11]:
# Fix for Parallel Coordinates Plot
key_features = ['Age', 'Weight', 'Height', 'FCVC', 'FAF']
available_for_parallel = [f for f in key_features if f in X.columns]

if len(available_for_parallel) >= 3:
    # Create DataFrame for parallel coordinates
    parallel_df = X[available_for_parallel].copy()
    
    # Convert obesity level to numerical for coloring
    parallel_df['Obesity Level'] = pd.factorize(y)[0]  # Encode as numbers
    parallel_df['Obesity Level (Category)'] = y  # Keep original categories
    
    fig7 = px.parallel_coordinates(
        parallel_df,
        color='Obesity Level',
        dimensions=available_for_parallel + ['Obesity Level'],
        color_continuous_scale=px.colors.diverging.Tealrose,
        labels={col: col for col in available_for_parallel + ['Obesity Level']},
        title='Parallel Coordinates: Feature Relationships'
    )
    
    # Update color bar to show category names
    fig7.update_layout(
        coloraxis_colorbar=dict(
            title="Obesity Level",
            tickvals=list(range(len(y.unique()))),
            ticktext=list(y.unique()),
            len=0.5
        ),
        height=700,
        template='plotly_white'
    )
    
    save_chart(fig7, "7_parallel_coordinates.html", "Parallel coordinates plot showing feature relationships")

Saved: 7_parallel_coordinates.html - Parallel coordinates plot showing feature relationships


In [12]:
if all(col in X.columns for col in ['Age', 'Weight', 'Height']):
    fig8 = px.scatter_3d(
        pd.DataFrame({
            'Age': X['Age'],
            'Weight': X['Weight'],
            'Height': X['Height'],
            'Obesity Level': y,
            'BMI': X['Weight'] / ((X['Height']/100) ** 2)
        }),
        x='Weight',
        y='Height',
        z='Age',
        color='Obesity Level',
        size='BMI',
        symbol='Obesity Level',
        title='3D View: Age, Weight, and Height by Obesity Level',
        template='plotly_white',
        opacity=0.7,
        color_discrete_sequence=px.colors.qualitative.Set2
    )
    
    fig8.update_layout(
        scene=dict(
            xaxis_title='Weight (kg)',
            yaxis_title='Height (cm)',
            zaxis_title='Age (years)'
        ),
        height=800
    )
    
    save_chart(fig8, "8_3d_scatter.html", "3D interactive visualization of key physical attributes")

Saved: 8_3d_scatter.html - 3D interactive visualization of key physical attributes


In [15]:
from string import Template

# Create index.html with Template to avoid escaping issues
index_template = Template("""<!DOCTYPE html>
<html>
<head>
    <title>Obesity Analysis - Interactive Charts</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 40px;
            background-color: #f5f5f5;
        }
        .header {
            background-color: #2c3e50;
            color: white;
            padding: 30px;
            border-radius: 10px;
            margin-bottom: 30px;
        }
        .chart-card {
            background-color: white;
            border-radius: 8px;
            padding: 20px;
            margin: 20px 0;
            box-shadow: 0 4px 6px rgba(0,0,0,0.1);
            transition: transform 0.2s;
        }
        .chart-card:hover {
            transform: translateY(-5px);
            box-shadow: 0 6px 12px rgba(0,0,0,0.15);
        }
        .chart-link {
            color: #3498db;
            text-decoration: none;
            font-size: 18px;
            font-weight: bold;
        }
        .chart-link:hover {
            color: #2980b9;
            text-decoration: underline;
        }
        .description {
            color: #7f8c8d;
            margin-top: 10px;
            line-height: 1.6;
        }
        .container {
            max-width: 1000px;
            margin: 0 auto;
        }
        .stats {
            background-color: #ecf0f1;
            padding: 15px;
            border-radius: 5px;
            margin-bottom: 20px;
        }
    </style>
</head>
<body>
    <div class="container">
        <div class="header">
            <h1>Obesity Analysis Dashboard</h1>
            <p>Interactive visualizations of obesity levels based on eating habits and physical condition</p>
            <div class="stats">
                <p><strong>Dataset:</strong> UCI Obesity Dataset (ID: 544)</p>
                <p><strong>Records:</strong> $num_records individuals</p>
                <p><strong>Features:</strong> $num_features lifestyle factors</p>
                <p><strong>Generated:</strong> $timestamp</p>
            </div>
        </div>
        
        <h2>Interactive Charts</h2>
        <p>Click on any chart below to open it in a new tab. All charts are fully interactive.</p>
        
        $chart_links
        
        <div style="margin-top: 40px; padding: 20px; background-color: #e8f4f8; border-radius: 8px;">
            <h3>How to Use These Charts:</h3>
            <ul>
                <li><strong>Hover</strong> over any point to see detailed information</li>
                <li><strong>Click and drag</strong> to zoom in on specific areas</li>
                <li><strong>Double-click</strong> to reset the view</li>
                <li><strong>Click on legend items</strong> to show/hide categories</li>
                <li>Use <strong>mouse wheel</strong> to zoom in/out on 3D charts</li>
                <li>All charts work in any modern web browser</li>
            </ul>
        </div>
        
        <footer style="margin-top: 40px; text-align: center; color: #7f8c8d; padding: 20px;">
            <p>Generated using Python, Plotly, and Jupyter Notebook</p>
            <p>Dataset: Estimation of obesity levels based on eating habits and physical condition (UCI ML Repository)</p>
        </footer>
    </div>
</body>
</html>""")

# Generate chart links
chart_files = [
    ("1_obesity_distribution.html", "Obesity Level Distribution", "Interactive pie chart showing the distribution of different obesity levels in the dataset"),
    ("2_bmi_visualizer.html", "BMI Visualizer", "Height vs Weight scatter plot with BMI color coding and reference lines"),
    ("3_age_distribution.html", "Age Analysis", "Box plots showing age distribution across different obesity levels"),
    ("4_lifestyle_dashboard.html", "Lifestyle Factors Dashboard", "Comprehensive analysis of eating habits and physical activity factors"),
    ("5_correlation_heatmap.html", "Correlation Heatmap", "Interactive correlation matrix showing relationships between all features"),
    ("6_feature_importance.html", "Feature Importance", "Bar chart ranking the most important factors for obesity prediction"),
    ("7_parallel_coordinates.html", "Parallel Coordinates", "Parallel coordinates plot showing patterns across multiple features"),
    ("8_3d_scatter.html", "3D Scatter Plot", "Interactive 3D visualization of Age, Weight, and Height by obesity level")
]

# Filter to only include files that were created
existing_charts = []
for filename, title, desc in chart_files:
    filepath = os.path.join(chart_dir, filename)
    if os.path.exists(filepath):
        existing_charts.append((filename, title, desc))

# Create HTML links
chart_links_html = ""
for filename, title, desc in existing_charts:
    chart_links_html += f"""
    <div class="chart-card">
        <a href="{filename}" target="_blank" class="chart-link">{title}</a>
        <div class="description">{desc}</div>
    </div>
    """

# Get dataset statistics
num_records = X.shape[0]
num_features = X.shape[1]
timestamp = pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")

# Format the index content using Template (safe with CSS braces)
index_content = index_template.substitute(
    num_records=num_records,
    num_features=num_features,
    timestamp=timestamp,
    chart_links=chart_links_html
)

# Write index.html
index_path = os.path.join(chart_dir, "index.html")
with open(index_path, 'w', encoding='utf-8') as f:
    f.write(index_content)

print(f"Created index page: {index_path}")

Created index page: obesity_analysis_charts\index.html


In [16]:
import zipfile

# Create a zip file of all charts
zip_filename = "obesity_analysis_charts.zip"
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(chart_dir):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, chart_dir)
            zipf.write(file_path, arcname)

print(f"\nCreated zip file: {zip_filename}")
print(f"Size: {os.path.getsize(zip_filename) / 1024 / 1024:.2f} MB")


Created zip file: obesity_analysis_charts.zip
Size: 0.21 MB


In [17]:
readme_content = f"""
# Obesity Analysis Interactive Charts

## Overview
This folder contains interactive visualizations of the Obesity Dataset from the UCI Machine Learning Repository.

## Contents
- `index.html` - Main dashboard with links to all charts
- `1_obesity_distribution.html` - Pie chart of obesity levels
- `2_bmi_visualizer.html` - Height vs Weight BMI calculator
- `3_age_distribution.html` - Age analysis by obesity level
- `4_lifestyle_dashboard.html` - Lifestyle factors analysis
- `5_correlation_heatmap.html` - Correlation matrix
- `6_feature_importance.html` - Feature importance ranking
- `7_parallel_coordinates.html` - Parallel coordinates plot
- `8_3d_scatter.html` - 3D scatter plot

## How to Use
1. Simply open `index.html` in any modern web browser (Chrome, Firefox, Edge, Safari)
2. Click on any chart to view it full screen
3. All charts are fully interactive:
   - Hover over elements to see details
   - Click and drag to zoom
   - Double-click to reset view
   - Click legend items to show/hide categories

## Dataset Information
- **Source**: UCI Machine Learning Repository (ID: 544)
- **Records**: {X.shape[0]} individuals
- **Features**: {X.shape[1]} lifestyle factors
- **Target**: Obesity Level (7 categories)

## Technical Details
- Created with Python, Plotly, and Jupyter Notebook
- Charts saved as standalone HTML files
- No server or special software required
- Works offline

## Contact
Generated on: {pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")}

Enjoy exploring the data!
"""

readme_path = os.path.join(chart_dir, "README.txt")
with open(readme_path, 'w', encoding='utf-8') as f:
    f.write(readme_content)

print(f"\nCreated README file: {readme_path}")


Created README file: obesity_analysis_charts\README.txt


In [18]:
print("\n" + "="*60)
print("ALL CHARTS SAVED SUCCESSFULLY!")
print("="*60)

# List all created files
print("\nFiles created in folder:", chart_dir)
for root, dirs, files in os.walk(chart_dir):
    for file in sorted(files):
        filepath = os.path.join(root, file)
        filesize = os.path.getsize(filepath)
        print(f"  • {file} ({filesize / 1024:.1f} KB)")

print(f"\nZip file: obesity_analysis_charts.zip ({os.path.getsize('obesity_analysis_charts.zip') / 1024 / 1024:.2f} MB)")

print("\nTo share your charts:")
print("1. Send the zip file to anyone")
print("2. They just need to unzip it and open index.html")
print("3. No Python or special software required!")

print("\nTo open the dashboard now:")
print(f"Open this file in your browser: file://{os.path.abspath(os.path.join(chart_dir, 'index.html'))}")


ALL CHARTS SAVED SUCCESSFULLY!

Files created in folder: obesity_analysis_charts
  • 1_obesity_distribution.html (8.7 KB)
  • 2_bmi_visualizer.html (374.2 KB)
  • 3_age_distribution.html (67.3 KB)
  • 4_lifestyle_dashboard.html (341.2 KB)
  • 5_correlation_heatmap.html (15.7 KB)
  • 6_feature_importance.html (9.3 KB)
  • 7_parallel_coordinates.html (93.7 KB)
  • 8_3d_scatter.html (99.6 KB)
  • README.txt (1.5 KB)
  • index.html (5.3 KB)

Zip file: obesity_analysis_charts.zip (0.21 MB)

To share your charts:
1. Send the zip file to anyone
2. They just need to unzip it and open index.html
3. No Python or special software required!

To open the dashboard now:
Open this file in your browser: file://C:\Users\user\obesity_analysis_charts\index.html
