#### Pandas Part 90: Styling DataFrames

This notebook explores the styling capabilities in pandas, which allow you to format and style DataFrames for better visualization and presentation.

In [None]:
import pandas as pd
import numpy as np
from IPython.display import display

##### 1. Creating Sample DataFrames

In [None]:
# Create a sample DataFrame with numeric data
np.random.seed(42)
df_numeric = pd.DataFrame({
    'A': np.random.randn(5),
    'B': np.random.randn(5),
    'C': np.random.randn(5),
    'D': np.random.randn(5)
})

# Create a sample DataFrame with mixed data types
df_mixed = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, 30, 35, 40, 45],
    'Salary': [50000, 60000, 75000, 90000, 100000],
    'Department': ['HR', 'IT', 'Finance', 'Marketing', 'Operations']
})

# Display the DataFrames
print("Numeric DataFrame:")
display(df_numeric)

print("\nMixed DataFrame:")
display(df_mixed)

##### 2. Basic Styling

The `.style` accessor returns a `Styler` object that can be used to style the DataFrame.

In [None]:
# Basic styling
styled = df_numeric.style
display(styled)

##### 3. Highlighting Maximum and Minimum Values

The `highlight_max()` and `highlight_min()` methods highlight the maximum and minimum values in the DataFrame.

In [None]:
# Highlight maximum values
styled_max = df_numeric.style.highlight_max(color='lightgreen')
display(styled_max)

In [None]:
# Highlight minimum values
styled_min = df_numeric.style.highlight_min(color='lightcoral')
display(styled_min)

In [None]:
# Highlight both maximum and minimum values
styled_both = df_numeric.style.highlight_max(color='lightgreen').highlight_min(color='lightcoral')
display(styled_both)

In [None]:
# Highlight max and min by row (axis=1)
styled_row = df_numeric.style.highlight_max(color='lightgreen', axis=1).highlight_min(color='lightcoral', axis=1)
display(styled_row)

##### 4. Formatting Values

The `format()` method allows you to format the values in the DataFrame.

In [None]:
# Format all numeric values to 2 decimal places
styled_format = df_numeric.style.format("{:.2f}")
display(styled_format)

In [None]:
# Format different columns differently
styled_mixed_format = df_mixed.style.format({
    'Age': '{} years',
    'Salary': '${:,.2f}',
    'Department': lambda x: f'Dept: {x}'
})
display(styled_mixed_format)

##### 5. Applying Custom Styles with `applymap()` and `apply()`

The `applymap()` method applies a function to each cell, while the `apply()` method applies a function to each row or column.

In [None]:
# Define a function to color cells based on their values
def color_negative_red(val):
    """
    Takes a scalar and returns a string with
    the css property 'color: red' for negative
    values, black otherwise.
    """
    color = 'red' if val < 0 else 'black'
    return f'color: {color}'

# Apply the function to each cell
styled_applymap = df_numeric.style.applymap(color_negative_red)
display(styled_applymap)

In [None]:
# Define a function to highlight the background of cells based on their values
def highlight_greaterthan(s, threshold):
    """
    Highlight values greater than threshold.
    """
    is_gt = s > threshold
    return ['background-color: yellow' if v else '' for v in is_gt]

# Apply the function to each column
styled_apply = df_numeric.style.apply(highlight_greaterthan, threshold=0.5, axis=0)
display(styled_apply)

##### 6. Combining Multiple Styles

You can chain multiple styling methods to combine different styles.

In [None]:
# Combine multiple styles
styled_combined = df_numeric.style\
    .format("{:.2f}")\
    .applymap(color_negative_red)\
    .highlight_max(color='lightgreen')\
    .highlight_min(color='lightcoral')
    
display(styled_combined)

##### 7. Using `background_gradient()` for Heatmaps

The `background_gradient()` method applies a color gradient to the background of cells based on their values.

In [None]:
# Create a heatmap using background_gradient
styled_heatmap = df_numeric.style.background_gradient(cmap='viridis')
display(styled_heatmap)

In [None]:
# Apply background_gradient to specific columns
styled_heatmap_subset = df_numeric.style.background_gradient(cmap='viridis', subset=['A', 'B'])
display(styled_heatmap_subset)

##### 8. Using `bar()` for Bar Charts

The `bar()` method creates a bar chart within the cells of the DataFrame.

In [None]:
# Create a bar chart
styled_bar = df_numeric.style.bar(color=['lightblue', 'lightgreen'], align='mid')
display(styled_bar)

In [None]:
# Apply bar chart to specific columns
styled_bar_subset = df_numeric.style.bar(subset=['C', 'D'], color='lightcoral', align='zero')
display(styled_bar_subset)

##### 9. Hiding Index and Columns

The `hide_index()` and `hide_columns()` methods allow you to hide the index and specific columns.

In [None]:
# Hide the index
styled_hide_index = df_numeric.style.hide_index()
display(styled_hide_index)

In [None]:
# Hide specific columns
styled_hide_columns = df_numeric.style.hide_columns(['B', 'D'])
display(styled_hide_columns)

##### 10. Setting Table Styles

The `set_table_styles()` method allows you to set CSS styles for the entire table.

In [None]:
# Set table styles
table_styles = [
    {'selector': 'th', 'props': [('background-color', 'lightblue'), ('color', 'black'), ('font-weight', 'bold')]},
    {'selector': 'td', 'props': [('padding', '5px')]},
    {'selector': 'tr:hover', 'props': [('background-color', 'lightyellow')]}
]

styled_table = df_numeric.style.set_table_styles(table_styles)
display(styled_table)

##### 11. Setting Properties for Specific Cells

The `set_properties()` method allows you to set CSS properties for specific cells.

In [None]:
# Set properties for specific cells
styled_properties = df_numeric.style.set_properties(**{'background-color': 'lightgray', 'color': 'black', 'border': '1px solid black'})
display(styled_properties)

In [None]:
# Set properties for specific columns
styled_properties_subset = df_numeric.style.set_properties(subset=['A', 'C'], **{'background-color': 'lightblue'})
display(styled_properties_subset)

##### 12. Using `highlight_null()` to Highlight Missing Values

The `highlight_null()` method highlights missing values in the DataFrame.

In [None]:
# Create a DataFrame with missing values
df_missing = df_numeric.copy()
df_missing.iloc[0, 0] = np.nan
df_missing.iloc[1, 2] = np.nan
df_missing.iloc[3, 1] = np.nan

# Highlight missing values
styled_missing = df_missing.style.highlight_null(null_color='red')
display(styled_missing)

##### 13. Exporting Styled DataFrames

You can export styled DataFrames to HTML or Excel.

In [None]:
# Export to HTML
html = styled_combined.render()
print("HTML output (first 500 characters):")
print(html[:500])

In [None]:
# Export to Excel (commented out to avoid creating files)
# styled_combined.to_excel('styled_dataframe.xlsx', engine='openpyxl')

##### 14. Creating a Comprehensive Styled Dashboard

Let's create a comprehensive styled dashboard by combining multiple styling techniques.

In [None]:
# Create a sample sales data DataFrame
np.random.seed(42)
sales_data = pd.DataFrame({
    'Product': ['Product A', 'Product B', 'Product C', 'Product D', 'Product E'],
    'Q1_Sales': np.random.randint(100, 1000, 5),
    'Q2_Sales': np.random.randint(100, 1000, 5),
    'Q3_Sales': np.random.randint(100, 1000, 5),
    'Q4_Sales': np.random.randint(100, 1000, 5),
})

# Calculate total sales
sales_data['Total_Sales'] = sales_data[['Q1_Sales', 'Q2_Sales', 'Q3_Sales', 'Q4_Sales']].sum(axis=1)

# Create a styled dashboard
dashboard = sales_data.style\
    .format({'Q1_Sales': '${:,.0f}', 'Q2_Sales': '${:,.0f}', 'Q3_Sales': '${:,.0f}', 'Q4_Sales': '${:,.0f}', 'Total_Sales': '${:,.0f}'})\
    .background_gradient(cmap='Blues', subset=['Total_Sales'])\
    .bar(subset=['Q1_Sales', 'Q2_Sales', 'Q3_Sales', 'Q4_Sales'], color='lightblue', align='mid')\
    .highlight_max(subset=['Q1_Sales', 'Q2_Sales', 'Q3_Sales', 'Q4_Sales'], color='lightgreen')\
    .highlight_min(subset=['Q1_Sales', 'Q2_Sales', 'Q3_Sales', 'Q4_Sales'], color='lightcoral')\
    .set_table_styles([
        {'selector': 'th', 'props': [('background-color', '#4CAF50'), ('color', 'white'), ('font-weight', 'bold')]},
        {'selector': 'td', 'props': [('padding', '5px')]},
        {'selector': 'caption', 'props': [('caption-side', 'top'), ('font-size', '1.5em'), ('font-weight', 'bold')]}
    ])\
    .set_caption('Sales Dashboard - Quarterly Performance')

display(dashboard)