#### Pandas Part 90: Styling DataFrames

This notebook explores the styling capabilities in pandas, which allow you to format and style DataFrames for better visualization and presentation.

In [1]:
import pandas as pd
import numpy as np
from IPython.display import display

##### 1. Creating Sample DataFrames

In [2]:
# Create a sample DataFrame with numeric data
np.random.seed(42)
df_numeric = pd.DataFrame({
    'A': np.random.randn(5),
    'B': np.random.randn(5),
    'C': np.random.randn(5),
    'D': np.random.randn(5)
})

# Create a sample DataFrame with mixed data types
df_mixed = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, 30, 35, 40, 45],
    'Salary': [50000, 60000, 75000, 90000, 100000],
    'Department': ['HR', 'IT', 'Finance', 'Marketing', 'Operations']
})

# Display the DataFrames
print("Numeric DataFrame:")
display(df_numeric)

print("\nMixed DataFrame:")
display(df_mixed)

Numeric DataFrame:


Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304



Mixed DataFrame:


Unnamed: 0,Name,Age,Salary,Department
0,Alice,25,50000,HR
1,Bob,30,60000,IT
2,Charlie,35,75000,Finance
3,David,40,90000,Marketing
4,Eve,45,100000,Operations


##### 2. Basic Styling

The `.style` accessor returns a `Styler` object that can be used to style the DataFrame.

In [3]:
# Basic styling
styled = df_numeric.style
display(styled)

Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304


##### 3. Highlighting Maximum and Minimum Values

The `highlight_max()` and `highlight_min()` methods highlight the maximum and minimum values in the DataFrame.

In [4]:
# Highlight maximum values
styled_max = df_numeric.style.highlight_max(color='lightgreen')
display(styled_max)

Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304


In [5]:
# Highlight minimum values
styled_min = df_numeric.style.highlight_min(color='lightcoral')
display(styled_min)

Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304


In [6]:
# Highlight both maximum and minimum values
styled_both = df_numeric.style.highlight_max(color='lightgreen').highlight_min(color='lightcoral')
display(styled_both)

Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304


In [7]:
# Highlight max and min by row (axis=1)
styled_row = df_numeric.style.highlight_max(color='lightgreen', axis=1).highlight_min(color='lightcoral', axis=1)
display(styled_row)

Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304


##### 4. Formatting Values

The `format()` method allows you to format the values in the DataFrame.

In [8]:
# Format all numeric values to 2 decimal places
styled_format = df_numeric.style.format("{:.2f}")
display(styled_format)

Unnamed: 0,A,B,C,D
0,0.5,-0.23,-0.46,-0.56
1,-0.14,1.58,-0.47,-1.01
2,0.65,0.77,0.24,0.31
3,1.52,-0.47,-1.91,-0.91
4,-0.23,0.54,-1.72,-1.41


In [9]:
# Format different columns differently
styled_mixed_format = df_mixed.style.format({
    'Age': '{} years',
    'Salary': '${:,.2f}',
    'Department': lambda x: f'Dept: {x}'
})
display(styled_mixed_format)

Unnamed: 0,Name,Age,Salary,Department
0,Alice,25 years,"$50,000.00",Dept: HR
1,Bob,30 years,"$60,000.00",Dept: IT
2,Charlie,35 years,"$75,000.00",Dept: Finance
3,David,40 years,"$90,000.00",Dept: Marketing
4,Eve,45 years,"$100,000.00",Dept: Operations


##### 5. Applying Custom Styles with `applymap()` and `apply()`

The `applymap()` method applies a function to each cell, while the `apply()` method applies a function to each row or column.

In [10]:
# Define a function to color cells based on their values
def color_negative_red(val):
    """
    Takes a scalar and returns a string with
    the css property 'color: red' for negative
    values, black otherwise.
    """
    color = 'red' if val < 0 else 'black'
    return f'color: {color}'

# Apply the function to each cell
styled_applymap = df_numeric.style.applymap(color_negative_red)
display(styled_applymap)

  styled_applymap = df_numeric.style.applymap(color_negative_red)


Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304


In [11]:
# Define a function to highlight the background of cells based on their values
def highlight_greaterthan(s, threshold):
    """
    Highlight values greater than threshold.
    """
    is_gt = s > threshold
    return ['background-color: yellow' if v else '' for v in is_gt]

# Apply the function to each column
styled_apply = df_numeric.style.apply(highlight_greaterthan, threshold=0.5, axis=0)
display(styled_apply)

Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304


##### 6. Combining Multiple Styles

You can chain multiple styling methods to combine different styles.

In [12]:
# Combine multiple styles
styled_combined = df_numeric.style\
    .format("{:.2f}")\
    .applymap(color_negative_red)\
    .highlight_max(color='lightgreen')\
    .highlight_min(color='lightcoral')
    
display(styled_combined)

  .applymap(color_negative_red)\


Unnamed: 0,A,B,C,D
0,0.5,-0.23,-0.46,-0.56
1,-0.14,1.58,-0.47,-1.01
2,0.65,0.77,0.24,0.31
3,1.52,-0.47,-1.91,-0.91
4,-0.23,0.54,-1.72,-1.41


##### 7. Using `background_gradient()` for Heatmaps

The `background_gradient()` method applies a color gradient to the background of cells based on their values.

In [13]:
# Create a heatmap using background_gradient
styled_heatmap = df_numeric.style.background_gradient(cmap='viridis')
display(styled_heatmap)

Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304


In [14]:
# Apply background_gradient to specific columns
styled_heatmap_subset = df_numeric.style.background_gradient(cmap='viridis', subset=['A', 'B'])
display(styled_heatmap_subset)

Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304


##### 8. Using `bar()` for Bar Charts

The `bar()` method creates a bar chart within the cells of the DataFrame.

In [15]:
# Create a bar chart
styled_bar = df_numeric.style.bar(color=['lightblue', 'lightgreen'], align='mid')
display(styled_bar)

Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304


In [16]:
# Apply bar chart to specific columns
styled_bar_subset = df_numeric.style.bar(subset=['C', 'D'], color='lightcoral', align='zero')
display(styled_bar_subset)

Unnamed: 0,A,B,C,D
0,0.496714,-0.234137,-0.463418,-0.562288
1,-0.138264,1.579213,-0.46573,-1.012831
2,0.647689,0.767435,0.241962,0.314247
3,1.52303,-0.469474,-1.91328,-0.908024
4,-0.234153,0.54256,-1.724918,-1.412304


##### 9. Hiding Index and Columns

The `hide_index()` and `hide_columns()` methods allow you to hide the index and specific columns.

In [18]:
import pandas as pd
import numpy as np

# Create a sample DataFrame
df_numeric = pd.DataFrame({
    'A': [1, 2, 3, 4, 5],
    'B': [10, 20, 30, 40, 50],
    'C': [100, 200, 300, 400, 500]
})
print("Original DataFrame:")
print(df_numeric)

# Method 1: Use style.hide() with axis parameter (for newer pandas versions)
try:
    styled_hide_index = df_numeric.style.hide(axis='index')
    print("\nMethod 1: Using style.hide(axis='index'):")
    display(styled_hide_index)
except AttributeError:
    print("\nMethod 1 not available in this pandas version")

# Method 2: Alternative approach - create a copy with reset_index and drop=True
df_no_index = df_numeric.reset_index(drop=True)
styled_no_index = df_no_index.style
print("\nMethod 2: Using reset_index(drop=True):")
display(styled_no_index)

# Method 3: Set index names to empty strings (visual effect only)
df_empty_index = df_numeric.copy()
df_empty_index.index.name = ''
styled_empty_index = df_empty_index.style
print("\nMethod 3: Setting index name to empty string:")
display(styled_empty_index)

Original DataFrame:
   A   B    C
0  1  10  100
1  2  20  200
2  3  30  300
3  4  40  400
4  5  50  500

Method 1: Using style.hide(axis='index'):


A,B,C
1,10,100
2,20,200
3,30,300
4,40,400
5,50,500



Method 2: Using reset_index(drop=True):


Unnamed: 0,A,B,C
0,1,10,100
1,2,20,200
2,3,30,300
3,4,40,400
4,5,50,500



Method 3: Setting index name to empty string:


Unnamed: 0,A,B,C
,,,
0.0,1.0,10.0,100.0
1.0,2.0,20.0,200.0
2.0,3.0,30.0,300.0
3.0,4.0,40.0,400.0
4.0,5.0,50.0,500.0


In [20]:
import pandas as pd
import numpy as np

# Create a sample DataFrame
df_numeric = pd.DataFrame({
    'A': [1, 2, 3, 4, 5],
    'B': [10, 20, 30, 40, 50],
    'C': [100, 200, 300, 400, 500],
    'D': [1000, 2000, 3000, 4000, 5000]
})
print("Original DataFrame:")
print(df_numeric)

# Method 1: Use style.hide() with subset parameter (for newer pandas versions)
try:
    styled_hide_columns = df_numeric.style.hide(subset=None, axis='columns', level=None, names=['B', 'D'])
    print("\nMethod 1: Using style.hide() with columns:")
    display(styled_hide_columns)
except (AttributeError, TypeError) as e:
    print(f"\nMethod 1 not available in this pandas version: {e}")

# Method 2: Alternative approach - create a copy with only the columns you want to display
df_selected_columns = df_numeric[['A', 'C']]  # Exclude 'B' and 'D'
styled_selected = df_selected_columns.style
print("\nMethod 2: Using DataFrame selection to exclude columns:")
display(styled_selected)

# Method 3: For display purposes, you can use a function to format unwanted columns as empty
def hide_columns(df, columns_to_hide):
    # Create a copy to avoid modifying the original
    df_copy = df.copy()
    # Replace values in columns to hide with empty strings
    for col in columns_to_hide:
        if col in df_copy.columns:
            df_copy[col] = ''
    return df_copy

df_formatted = hide_columns(df_numeric, ['B', 'D'])
styled_formatted = df_formatted.style
print("\nMethod 3: Replacing values in columns to hide with empty strings:")
display(styled_formatted)

Original DataFrame:
   A   B    C     D
0  1  10  100  1000
1  2  20  200  2000
2  3  30  300  3000
3  4  40  400  4000
4  5  50  500  5000

Method 1: Using style.hide() with columns:


Unnamed: 0,A,B,C,D
0,1,10,100,1000
1,2,20,200,2000
2,3,30,300,3000
3,4,40,400,4000
4,5,50,500,5000



Method 2: Using DataFrame selection to exclude columns:


Unnamed: 0,A,C
0,1,100
1,2,200
2,3,300
3,4,400
4,5,500



Method 3: Replacing values in columns to hide with empty strings:


Unnamed: 0,A,B,C,D
0,1,,100,
1,2,,200,
2,3,,300,
3,4,,400,
4,5,,500,


##### 10. Setting Table Styles

The `set_table_styles()` method allows you to set CSS styles for the entire table.

In [21]:
# Set table styles
table_styles = [
    {'selector': 'th', 'props': [('background-color', 'lightblue'), ('color', 'black'), ('font-weight', 'bold')]},
    {'selector': 'td', 'props': [('padding', '5px')]},
    {'selector': 'tr:hover', 'props': [('background-color', 'lightyellow')]}
]

styled_table = df_numeric.style.set_table_styles(table_styles)
display(styled_table)

Unnamed: 0,A,B,C,D
0,1,10,100,1000
1,2,20,200,2000
2,3,30,300,3000
3,4,40,400,4000
4,5,50,500,5000


##### 11. Setting Properties for Specific Cells

The `set_properties()` method allows you to set CSS properties for specific cells.

In [22]:
# Set properties for specific cells
styled_properties = df_numeric.style.set_properties(**{'background-color': 'lightgray', 'color': 'black', 'border': '1px solid black'})
display(styled_properties)

Unnamed: 0,A,B,C,D
0,1,10,100,1000
1,2,20,200,2000
2,3,30,300,3000
3,4,40,400,4000
4,5,50,500,5000


In [23]:
# Set properties for specific columns
styled_properties_subset = df_numeric.style.set_properties(subset=['A', 'C'], **{'background-color': 'lightblue'})
display(styled_properties_subset)

Unnamed: 0,A,B,C,D
0,1,10,100,1000
1,2,20,200,2000
2,3,30,300,3000
3,4,40,400,4000
4,5,50,500,5000


##### 12. Using `highlight_null()` to Highlight Missing Values

The `highlight_null()` method highlights missing values in the DataFrame.

In [25]:
import pandas as pd
import numpy as np

# Create a DataFrame with missing values
df_numeric = pd.DataFrame({
    'A': [1, 2, 3, 4, 5],
    'B': [10, 20, 30, 40, 50],
    'C': [100, 200, 300, 400, 500]
})

df_missing = df_numeric.copy()
df_missing.iloc[0, 0] = np.nan
df_missing.iloc[1, 2] = np.nan
df_missing.iloc[3, 1] = np.nan
print("DataFrame with missing values:")
print(df_missing)

# Method 1: Use highlight_null without parameters
try:
    styled_missing = df_missing.style.highlight_null()
    print("\nMethod 1: Using highlight_null() without parameters:")
    display(styled_missing)
except TypeError as e:
    print(f"\nMethod 1 error: {e}")

# Method 2: Use a custom styling function
def highlight_nulls(x):
    is_null = pd.isna(x)
    return ['background-color: red' if v else '' for v in is_null]

try:
    styled_custom = df_missing.style.apply(highlight_nulls, axis=1)
    print("\nMethod 2: Using custom styling function:")
    display(styled_custom)
except Exception as e:
    print(f"\nMethod 2 error: {e}")

# Method 3: Use applymap for cell-by-cell styling
def highlight_null_cells(val):
    if pd.isna(val):
        return 'background-color: red'
    return ''

try:
    styled_applymap = df_missing.style.applymap(highlight_null_cells)
    print("\nMethod 3: Using applymap for cell-by-cell styling:")
    display(styled_applymap)
except Exception as e:
    print(f"\nMethod 3 error: {e}")

DataFrame with missing values:
     A     B      C
0  NaN  10.0  100.0
1  2.0  20.0    NaN
2  3.0  30.0  300.0
3  4.0   NaN  400.0
4  5.0  50.0  500.0

Method 1: Using highlight_null() without parameters:


Unnamed: 0,A,B,C
0,,10.0,100.0
1,2.0,20.0,
2,3.0,30.0,300.0
3,4.0,,400.0
4,5.0,50.0,500.0



Method 2: Using custom styling function:


Unnamed: 0,A,B,C
0,,10.0,100.0
1,2.0,20.0,
2,3.0,30.0,300.0
3,4.0,,400.0
4,5.0,50.0,500.0



Method 3: Using applymap for cell-by-cell styling:


  styled_applymap = df_missing.style.applymap(highlight_null_cells)


Unnamed: 0,A,B,C
0,,10.0,100.0
1,2.0,20.0,
2,3.0,30.0,300.0
3,4.0,,400.0
4,5.0,50.0,500.0


##### 13. Exporting Styled DataFrames

You can export styled DataFrames to HTML or Excel.

In [27]:
import pandas as pd
import numpy as np

# Create a sample DataFrame
df_numeric = pd.DataFrame({
    'A': [1, 2, 3, 4, 5],
    'B': [10, 20, 30, 40, 50],
    'C': [100, 200, 300, 400, 500]
})

# Apply some styling
def color_negative_red(val):
    color = 'red' if val < 0 else 'black'
    return f'color: {color}'

# Create a styled DataFrame
styled_combined = df_numeric.style.applymap(color_negative_red)

# Method 1: Use to_html() method (available in most pandas versions)
try:
    html = styled_combined.to_html()
    print("Method 1: Using to_html()")
    print("HTML output (first 500 characters):")
    print(html[:500])
except AttributeError as e:
    print(f"Method 1 error: {e}")

# Method 2: Alternative approach using DataFrame's to_html
try:
    html_basic = df_numeric.to_html()
    print("\nMethod 2: Using DataFrame's to_html() (without styling)")
    print("HTML output (first 500 characters):")
    print(html_basic[:500])
except Exception as e:
    print(f"Method 2 error: {e}")

# Method 3: Try _repr_html_ method which is used for notebook display
try:
    html_repr = styled_combined._repr_html_()
    print("\nMethod 3: Using _repr_html_()")
    print("HTML output (first 500 characters):")
    print(html_repr[:500])
except AttributeError as e:
    print(f"Method 3 error: {e}")

# Save to file (if any method worked)
try:
    if 'html' in locals():
        with open('styled_dataframe.html', 'w') as f:
            f.write(html)
        print("\nSaved styled HTML to 'styled_dataframe.html'")
    elif 'html_repr' in locals():
        with open('styled_dataframe.html', 'w') as f:
            f.write(html_repr)
        print("\nSaved styled HTML to 'styled_dataframe.html'")
    else:
        with open('basic_dataframe.html', 'w') as f:
            f.write(html_basic)
        print("\nSaved basic HTML to 'basic_dataframe.html'")
except Exception as e:
    print(f"Error saving to file: {e}")

Method 1: Using to_html()
HTML output (first 500 characters):
<style type="text/css">
#T_41293_row0_col0, #T_41293_row0_col1, #T_41293_row0_col2, #T_41293_row1_col0, #T_41293_row1_col1, #T_41293_row1_col2, #T_41293_row2_col0, #T_41293_row2_col1, #T_41293_row2_col2, #T_41293_row3_col0, #T_41293_row3_col1, #T_41293_row3_col2, #T_41293_row4_col0, #T_41293_row4_col1, #T_41293_row4_col2 {
  color: black;
}
</style>
<table id="T_41293">
  <thead>
    <tr>
      <th class="blank level0" >&nbsp;</th>
      <th id="T_41293_level0_col0" class="col_heading level0 col

Method 2: Using DataFrame's to_html() (without styling)
HTML output (first 500 characters):
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>A</th>
      <th>B</th>
      <th>C</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>1</td>
      <td>10</td>
      <td>100</td>
    </tr>
    <tr>
      <th>1</th>
      <td>2</td>
      <td>20</td>
      <td>20

  styled_combined = df_numeric.style.applymap(color_negative_red)


In [None]:
# Export to Excel (commented out to avoid creating files)
# styled_combined.to_excel('styled_dataframe.xlsx', engine='openpyxl')

##### 14. Creating a Comprehensive Styled Dashboard

Let's create a comprehensive styled dashboard by combining multiple styling techniques.

In [28]:
# Create a sample sales data DataFrame
np.random.seed(42)
sales_data = pd.DataFrame({
    'Product': ['Product A', 'Product B', 'Product C', 'Product D', 'Product E'],
    'Q1_Sales': np.random.randint(100, 1000, 5),
    'Q2_Sales': np.random.randint(100, 1000, 5),
    'Q3_Sales': np.random.randint(100, 1000, 5),
    'Q4_Sales': np.random.randint(100, 1000, 5),
})

# Calculate total sales
sales_data['Total_Sales'] = sales_data[['Q1_Sales', 'Q2_Sales', 'Q3_Sales', 'Q4_Sales']].sum(axis=1)

# Create a styled dashboard
dashboard = sales_data.style\
    .format({'Q1_Sales': '${:,.0f}', 'Q2_Sales': '${:,.0f}', 'Q3_Sales': '${:,.0f}', 'Q4_Sales': '${:,.0f}', 'Total_Sales': '${:,.0f}'})\
    .background_gradient(cmap='Blues', subset=['Total_Sales'])\
    .bar(subset=['Q1_Sales', 'Q2_Sales', 'Q3_Sales', 'Q4_Sales'], color='lightblue', align='mid')\
    .highlight_max(subset=['Q1_Sales', 'Q2_Sales', 'Q3_Sales', 'Q4_Sales'], color='lightgreen')\
    .highlight_min(subset=['Q1_Sales', 'Q2_Sales', 'Q3_Sales', 'Q4_Sales'], color='lightcoral')\
    .set_table_styles([
        {'selector': 'th', 'props': [('background-color', '#4CAF50'), ('color', 'white'), ('font-weight', 'bold')]},
        {'selector': 'td', 'props': [('padding', '5px')]},
        {'selector': 'caption', 'props': [('caption-side', 'top'), ('font-size', '1.5em'), ('font-weight', 'bold')]}
    ])\
    .set_caption('Sales Dashboard - Quarterly Performance')

display(dashboard)

Unnamed: 0,Product,Q1_Sales,Q2_Sales,Q3_Sales,Q4_Sales,Total_Sales
0,Product A,$202,$171,$566,$472,"$1,411"
1,Product B,$535,$800,$314,$199,"$1,848"
2,Product C,$960,$120,$430,$971,"$2,481"
3,Product D,$370,$714,$558,$763,"$2,405"
4,Product E,$206,$221,$187,$230,$844
