In [None]:
#%pip install pandas seaborn matplotlib streamlit plotly numpy scipy plotnine

In [None]:
import pandas as pd
import seaborn as sns

In [None]:
# Generate the data
import csv
import random
from datetime import datetime, timedelta
import pandas as pd

def generate_bmi(age):
    base_bmi = random.uniform(18.5, 25.0)
    age_factor = (age - 18) / 72  # Normalize age to 0-1 range
    bmi_increase = random.uniform(0, 10) * age_factor
    return round(base_bmi + bmi_increase, 1)

def generate_blood_pressure(age, bmi):
    base_systolic = 90
    age_factor = (age - 18) / 72  # Normalize age to 0-1 range
    bmi_factor = (bmi - 18.5) / 21.5  # Normalize BMI to 0-1 range
    
    systolic_increase = (30 * age_factor) + (20 * bmi_factor)
    systolic = int(base_systolic + systolic_increase + random.uniform(-10, 10))
    
    return max(90, min(180, systolic))  # Ensure BP is between 90 and 180

# Generate data
data = []
for _ in range(1000):  # Generate 1000 records
    year = random.randint(2016, 2020)
    age = random.randint(18, 90)
    bmi = generate_bmi(age)
    blood_pressure = generate_blood_pressure(age, bmi)
    
    if age < 30:
        age_group = "Young Adult"
    elif age < 50:
        age_group = "Adult"
    else:
        age_group = "Senior"
    
    admissions = random.randint(1, 10)
    
    data.append([year, admissions, age, blood_pressure, bmi, age_group])

# Convert to DataFrame
df = pd.DataFrame(data, columns=["Year", "Admissions", "Age", "BloodPressure", "BMI", "AgeGroup"])

# Group by Year and sum Admissions
yearly_data = df.groupby("Year")["Admissions"].sum().reset_index()

# Sort by Year
yearly_data = yearly_data.sort_values("Year")


# Save the full dataset as well for other examples
df.to_csv('data.csv', index=False)

print("data.csv has been generated successfully with all individual records.")

In [None]:
df = pd.read_csv('data.csv')

In [None]:
# Aggregate yearly admissions
yearly_data = df.groupby('Year')['Admissions'].sum().reset_index()
yearly_data = yearly_data.sort_values('Year')
display(yearly_data.head())

# Create plot using pandas
ax = yearly_data.plot(
    x='Year',
    y='Admissions',
    kind='line',
    figsize=(10, 6),
    title='Yearly Hospital Admissions',
    grid=True,
    marker='o',
    markersize=8,
    color='blue',
    linewidth=2,
    xlabel='Year',
    ylabel='Total Admissions',
    legend=True
)

In [None]:
### 2. `hist()`: Histogram

ax = df['Age'].plot(
    kind='hist',
    bins=20,
    figsize=(10, 6),
    title='Distribution of Patient Ages',
    grid=True,
    color='skyblue',
    edgecolor='black',
    alpha=0.7,
    xlabel='Age',
    ylabel='Frequency'
)

In [None]:
# Matplotlib: The Foundation

import matplotlib.pyplot as plt

# Aggregate yearly admissions
yearly_data = df.groupby('Year')['Admissions'].sum().reset_index()
yearly_data = yearly_data.sort_values('Year')

# Create figure and axis
plt.figure(figsize=(10, 6))

# Create line plot
plt.plot(yearly_data['Year'], yearly_data['Admissions'], 
         marker='o', linestyle='-', color='blue', 
         linewidth=2, markersize=8, label='Total Admissions')

# Customize the plot
plt.title('Total Admissions Over Years', fontsize=14, pad=20)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Total Admissions', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)

# Add legend
plt.legend(fontsize=10)

# Adjust layout and display
plt.tight_layout()
plt.show()


In [None]:
### 2. `scatter()`: Scatter Plot

# Load data
df = pd.read_csv('data.csv')

# Create figure
plt.figure(figsize=(10, 6))

# Create scatter plot
plt.scatter(df['BMI'], df['BloodPressure'], 
           c='red', alpha=0.7, s=50)

# Customize the plot
plt.title('Blood Pressure vs BMI', fontsize=14, pad=20)
plt.xlabel('BMI', fontsize=12)
plt.ylabel('Blood Pressure', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)

# Adjust layout and display
plt.tight_layout()
plt.show()


In [None]:
### Additional Plot Types in Matplotlib

# 1. Bar Plot: Average Blood Pressure by Age Group
plt.figure(figsize=(10, 6))
avg_bp = df.groupby('AgeGroup')['BloodPressure'].mean()
plt.bar(avg_bp.index, avg_bp.values, color=['skyblue', 'lightgreen', 'coral'])
plt.title('Average Blood Pressure by Age Group')
plt.xlabel('Age Group')
plt.ylabel('Average Blood Pressure')
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# 2. Histogram: Distribution of BMI
plt.figure(figsize=(10, 6))
plt.hist(df['BMI'], bins=30, color='skyblue', edgecolor='black')
plt.title('Distribution of BMI')
plt.xlabel('BMI')
plt.ylabel('Frequency')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# 3. Box Plot: Blood Pressure by Age Group
plt.figure(figsize=(10, 6))
data = [df[df['AgeGroup'] == group]['BloodPressure'] 
        for group in df['AgeGroup'].unique()]
plt.boxplot(data, tick_labels=df['AgeGroup'].unique())
plt.title('Blood Pressure Distribution by Age Group')
plt.xlabel('Age Group')
plt.ylabel('Blood Pressure')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# 4. Scatter Plot with Multiple Groups
plt.figure(figsize=(12, 8))
colors = {'Young Adult': 'blue', 'Adult': 'green', 'Senior': 'red'}
for age_group in df['AgeGroup'].unique():
    mask = df['AgeGroup'] == age_group
    plt.scatter(df[mask]['BMI'], 
               df[mask]['BloodPressure'],
               c=colors[age_group],
               label=age_group,
               alpha=0.6)
plt.title('BMI vs Blood Pressure by Age Group')
plt.xlabel('BMI')
plt.ylabel('Blood Pressure')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()



In [None]:
# 5. Line Plot: Average BMI by Age
plt.figure(figsize=(10, 6))
yearly_avg = df.groupby('Age')['BMI'].mean()
plt.plot(yearly_avg.index, yearly_avg.values, 
         marker='o', linewidth=2, color='purple')
plt.title('Average BMI by Age')
plt.xlabel('Age')
plt.ylabel('Average BMI')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
## 1. Pie Chart - Admissions by Age Group

# Calculate total admissions per age group
admissions_by_age = df.groupby('AgeGroup')['Admissions'].sum()

plt.figure(figsize=(8, 8))
plt.pie(admissions_by_age, labels=admissions_by_age.index, autopct='%1.1f%%')
plt.title('Distribution of Admissions by Age Group')
plt.show()

In [None]:
## 2. Subplots - Multiple Visualizations

fig, axs = plt.subplots(2, 2, figsize=(12, 12))

# Plot 1: Age vs BMI scatter
axs[0, 0].scatter(df['Age'], df['BMI'])
axs[0, 0].set_title('Age vs BMI')
axs[0, 0].set_xlabel('Age')
axs[0, 0].set_ylabel('BMI')

# Plot 2: Blood Pressure distribution
axs[0, 1].hist(df['BloodPressure'], bins=15)
axs[0, 1].set_title('Blood Pressure Distribution')
axs[0, 1].set_xlabel('Blood Pressure')
axs[0, 1].set_ylabel('Count')

# Plot 3: Admissions by Year
yearly_admissions = df.groupby('Year')['Admissions'].sum()
axs[1, 0].bar(yearly_admissions.index, yearly_admissions.values)
axs[1, 0].set_title('Yearly Admissions')
axs[1, 0].set_xlabel('Year')
axs[1, 0].set_ylabel('Total Admissions')

# Plot 4: BMI vs Blood Pressure
axs[1, 1].scatter(df['BMI'], df['BloodPressure'])
axs[1, 1].set_title('BMI vs Blood Pressure')
axs[1, 1].set_xlabel('BMI')
axs[1, 1].set_ylabel('Blood Pressure')

plt.tight_layout()
plt.show()

In [None]:
# Error Bars: Blood Pressure by Age Group

# Calculate mean and std of blood pressure by age group
bp_stats = df.groupby('AgeGroup')['BloodPressure'].agg(['mean', 'std']).reset_index()

plt.figure(figsize=(10, 6))
plt.errorbar(range(len(bp_stats)), bp_stats['mean'], yerr=bp_stats['std'], fmt='o')
plt.xticks(range(len(bp_stats)), bp_stats['AgeGroup'], rotation=45)
plt.title('Average Blood Pressure by Age Group (with std dev)')
plt.xlabel('Age Group')
plt.ylabel('Blood Pressure')
plt.tight_layout()
plt.show()

In [None]:
## 4. Contour Plot - BMI vs Age vs Blood Pressure

import numpy as np
from scipy.interpolate import griddata

# Create a grid of points
age_range = np.linspace(df['Age'].min(), df['Age'].max(), 100)
bmi_range = np.linspace(df['BMI'].min(), df['BMI'].max(), 100)
X, Y = np.meshgrid(age_range, bmi_range)

# Create interpolated blood pressure values
Z = griddata((df['Age'], df['BMI']), df['BloodPressure'], (X, Y), method='cubic')

plt.figure(figsize=(10, 8))
plt.contourf(X, Y, Z, levels=15, cmap='viridis')
plt.colorbar(label='Blood Pressure')
plt.scatter(df['Age'], df['BMI'], c='red', s=50, alpha=0.5)
plt.title('Blood Pressure Contour Map (Age vs BMI)')
plt.xlabel('Age')
plt.ylabel('BMI')
plt.tight_layout()
plt.show()

# Seaborn: Statistical Data Visualization

In [None]:
### 1. `scatterplot()`: Scatter Plot

# Set the style and figure size
# Note: this sets the style for all subsequent plots!
sns.set_theme(style="whitegrid", rc={"figure.figsize": (10, 6)})

# Load dataset
df = pd.read_csv('data.csv')
 
# Create scatter plot
sns.scatterplot(data=df, 
                x='BMI', 
                y='BloodPressure', 
                hue='AgeGroup',
                size='Age', # Not a good choice
                sizes=(50, 200),
                alpha=0.5)


In [None]:
# 1. Violin Plot: Blood Pressure Distribution by Age Group

ax = sns.violinplot(data=df, x='AgeGroup', y='BloodPressure')
ax.set_title('Blood Pressure Distribution by Age Group')

In [None]:

# 2. Joint Plot: Age vs BMI Relationship

g = sns.jointplot(data=df, 
                  x='Age', 
                  y='BMI',
                  kind='hex',
                  height=8)


In [None]:

# 3. Box Plot with Points: BMI by Age Group

ax = sns.boxplot(data=df, x='AgeGroup', y='BMI', color='lightblue')
ax = sns.swarmplot(data=df, x='AgeGroup', y='BMI', color='0.25', alpha=0.5)
ax.set_title('BMI Distribution by Age Group')


In [None]:
# 4. Heatmap: Correlation Matrix

# Compute the correlation matrix with pandas
correlation_matrix = df[['Age', 'BloodPressure', 'BMI', 'Admissions']].corr()

ax = sns.heatmap(correlation_matrix, 
            annot=True, 
            cmap='coolwarm', 
            center=0,
            fmt='.2f')
ax.set_title('Correlation Heatmap of Health Metrics')


In [None]:

# 5. Line Plot: Average Blood Pressure by Age

# Compute the average blood pressure by age
age_bp = df.groupby('Age')['BloodPressure'].mean().reset_index()

# Create line plot
ax = sns.lineplot(data=age_bp, x='Age', y='BloodPressure')
ax.set_title('Average Blood Pressure by Age')


In [None]:

# 6. Count Plot: Distribution of Age Groups by Year

ax = sns.countplot(data=df, x='Year', hue='AgeGroup')
ax.set_title('Distribution of Age Groups by Year')


In [None]:
# 7. Pair Plot: Relationships Between All Numeric Variables
# Note: This may take a while to run with large datasets

# (saving my favorite for last)
sns.pairplot(df, 
             vars=['Age', 'BloodPressure', 'BMI', 'Admissions'],
             hue='AgeGroup',
             diag_kind='kde')

In [None]:
## Plotly Dash: Building an App

# Put this code in a Python file (e.g., app.py) to run
# as it creates a web application using Dash.

import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd

# Load data
df = pd.read_csv('data.csv')

# Initialize the app
app = dash.Dash(__name__)

# Define the layout
app.layout = html.Div([
    # Header
    html.H1('Health Data Dashboard', 
            style={'textAlign': 'center', 'marginBottom': 30}),
    
    # Dropdown container
    html.Div([
        html.Label('Select Age Group:'),
        dcc.Dropdown(
            id='age-dropdown',
            options=[{'label': age, 'value': age} 
                    for age in sorted(df['AgeGroup'].unique())],
            value=sorted(df['AgeGroup'].unique())[0],
            style={'width': '50%', 'marginBottom': 20}
        )
    ]),
    
    # Graph container
    html.Div([
        dcc.Graph(id='bmi-bloodpressure-scatter')
    ])
], style={'padding': 20})

# Define the callback
@app.callback(
    Output('bmi-bloodpressure-scatter', 'figure'),
    [Input('age-dropdown', 'value')]
)
def update_graph(selected_age):
    filtered_df = df[df['AgeGroup'] == selected_age]
    
    fig = px.scatter(
        filtered_df, 
        x='BMI', 
        y='BloodPressure',
        title=f'BMI vs Blood Pressure for {selected_age}',
        labels={'BMI': 'Body Mass Index', 
                'BloodPressure': 'Blood Pressure'},
        template='plotly_white'
    )
    
    fig.update_traces(marker=dict(size=10))
    fig.update_layout(
        title_x=0.5,
        title_font_size=20,
        showlegend=False
    )
    
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)

### Code Explanation

# - **Imports**:
#   - `dash`, `dash_core_components` (`dcc`), `dash_html_components` (`html`), `dash.dependencies` for interactivity.
#   - `plotly.express` for plotting.
# - **Data Loading**:
#   - Reads health data into a DataFrame.
# - **App Initialization**:
#   - Creates a Dash app instance.
# - **Layout Definition**:
#   - Contains a header, dropdown menu, and a graph component.
# - **Callback Function**:
#   - Updates the graph based on the selected age group from the dropdown.


# Plotnine

In [None]:
### 1. `ggplot()`: Initialize a Plot


from plotnine import ggplot, aes, geom_point, ggtitle

# Create plot
plot = (ggplot(df, aes(x='BMI', y='BloodPressure'))
        + geom_point(color='blue')
        + ggtitle('Blood Pressure vs BMI'))

plot.show()

### Code Explanation

# - **Importing Libraries**:
#   ```python
#   from plotnine import ggplot, aes, geom_point, ggtitle
#   import pandas as pd
#   ```
#   - Imports necessary components from Plotnine and pandas.
# - **Defining Data**:
#   - Creates a DataFrame `df` with 'BMI' and 'BloodPressure' columns.
# - **Creating the Plot**:
#   ```python
#   plot = (ggplot(df, aes(x='BMI', y='BloodPressure'))
#           + geom_point(color='blue')
#           + ggtitle('Blood Pressure vs BMI'))
#   ```
#   - Initializes the plot with data and aesthetic mappings.
#   - Uses `geom_point()` to add scatter plot points.
#   - Adds a title with `ggtitle()`.
# - **Displaying the Plot**:
#   - The `plot.show()` statement renders the plot.

In [None]:
### 2. Layering Geometries and Aesthetics

# - **Adding Layers**: Use the `+` operator to add layers.
from plotnine import geom_smooth

plot = (ggplot(df, aes(x='BMI', y='BloodPressure'))
          + geom_point(color='blue')
          + geom_smooth(method='lm')
          + ggtitle('Blood Pressure vs BMI with Regression Line'))

plot.show()

### Code Explanation

# - **Adding `geom_smooth()`**:
#   - Adds a smooth line (here, a linear regression line) to the plot.
#   - `method='lm'` specifies a linear model.
# - **Enhanced Visualization**:
#   - Helps in identifying trends or relationships in the data.

In [None]:

### 3. Facetting: Creating Multiple Plots

# - **Explanation**: Splits the data into subsets according to a variable and creates multiple plots.

from plotnine import facet_wrap

plot = (ggplot(df, aes(x='BMI', y='BloodPressure'))
        + geom_point(color='blue')
        + facet_wrap('~ AgeGroup')
        + ggtitle('Blood Pressure vs BMI by Age Group'))

plot.show()

### Code Explanation
# - **Using `facet_wrap()`**:
#   - Creates separate plots for each age group.
# - **Purpose**:
#   - Allows comparison across different subsets of the data.

# Streamlit

In [None]:
## Streamlit: Building an App

# Put this code in a Python file (e.g., app.py) to run
# as it creates a web application using Dash.

import streamlit as st
import pandas as pd
import plotly.express as px

# Page config
st.set_page_config(
    page_title="Health Data Explorer",
    layout="wide"
)

# Load data
df = pd.read_csv('data.csv')

# App title
st.title('Health Data Explorer')
st.markdown('---')

# Sidebar
st.sidebar.header('Filters')
age_group = st.sidebar.selectbox(
    'Select Age Group',
    options=sorted(df['AgeGroup'].unique())
)

# Main content
col1, col2 = st.columns([2, 3])

with col1:
    st.subheader('Data Summary')
    filtered_df = df[df['AgeGroup'] == age_group]
    
    # Summary statistics
    summary = pd.DataFrame({
        'Metric': ['Average BMI', 'Average Blood Pressure', 'Total Patients'],
        'Value': [
            f"{filtered_df['BMI'].mean():.1f}",
            f"{filtered_df['BloodPressure'].mean():.1f}",
            len(filtered_df)
        ]
    })
    st.table(summary)

with col2:
    st.subheader('BMI vs Blood Pressure')
    fig = px.scatter(
        filtered_df,
        x='BMI',
        y='BloodPressure',
        title=f'BMI vs Blood Pressure for {age_group}',
        labels={'BMI': 'Body Mass Index', 
                'BloodPressure': 'Blood Pressure'},
        template='plotly_white'
    )
    
    fig.update_traces(marker=dict(size=10))
    fig.update_layout(
        title_x=0.5,
        title_font_size=20,
        showlegend=False
    )
    
    st.plotly_chart(fig, use_container_width=True)

# Display raw data
st.markdown('---')
st.subheader('Raw Data')
st.dataframe(filtered_df)

### Code Explanation

# - **Imports**:
#   - `streamlit` for the app interface.
#   - `pandas` and `plotly.express` for data handling and plotting.
# - **Data Loading**:
#   - Reads the health data into a DataFrame.
# - **App Components**:
#   - `st.title()` sets the title.
#   - `st.sidebar.selectbox()` creates a dropdown in the sidebar.
#   - `st.write()` displays text and data.
#   - `st.plotly_chart()` renders the plot.
# - **Interactivity**:
#   - Selecting an age group filters the data and updates the display and plot.