# Pandas Rule Finder Examples

This notebook demonstrates how to use the pandas rule finder extension.

In [1]:
import sys
from pathlib import Path

# Add parent directory to path so we can import the package
module_path = str(Path('.').absolute().parent)
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
from pd_1d_rule_solver.pd_1d_rule_solver import RuleFinder  # This registers the extension automatically

# Load sample iris dataset
from sklearn.datasets import load_iris
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)

# Look at the data
print("Dataset shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nFirst few rows:")
print(df.head())

Dataset shape: (150, 5)

Columns: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)', 'species']

First few rows:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

  species  
0  setosa  
1  setosa  
2  setosa  
3  setosa  
4  setosa  


## Example 1: Finding Rules for Numeric Targets

Let's find rules that maximize sepal length:

In [4]:
# Without visualization
# Example 1: Find rule for maximizing sepal length with default bins (12)
print("\nExample 1: Maximizing sepal length (default bins)")
result = df.findrule(
    target='sepal length (cm)',
    direction='maximize',
    variables=['petal length (cm)', 'petal width (cm)']
)
print(result)

# Example 2: Same analysis with more bins
print("\nExample 2: Same analysis with 20 bins")
result = df.findrule(
    target='sepal length (cm)',
    direction='maximize',
    variables=['petal length (cm)', 'petal width (cm)'],
    bins=20
)
print(result)

# Example 3: Find rule for predicting setosa species
print("\nExample 3: Predicting setosa species")
result = df.findrule(
    target='species',
    direction='setosa',
    variables=['sepal length (cm)', 'sepal width (cm)']
)
print(result)

# Access the metrics for the best rule
print("\nDetailed metrics for the best rule:")
print(f"Score: {result['metrics']['score']:.3f}")
print(f"Coverage: {result['metrics']['coverage']:.1%}")
print(f"Matching samples: {result['metrics']['matching_samples']}")


Example 1: Maximizing sepal length (default bins)
{'rule': {'petal length (cm)': (np.float64(4.3), np.float64(6.9))}, 'metrics': {'score': np.float64(0.2549019607843139), 'matching_median': np.float64(6.4), 'non_matching_median': np.float64(5.1), 'matching_mean': np.float64(6.472727272727272), 'non_matching_mean': np.float64(5.17945205479452), 'matching_std': np.float64(0.5888517737794917), 'non_matching_std': np.float64(0.42718682619538473), 'matching_samples': 77, 'total_samples': 150, 'coverage': 0.5133333333333333, 'cohens_d': np.float64(2.5140949530113748), 'matching_quartiles': {0.25: 6.1, 0.5: 6.4, 0.75: 6.8}, 'non_matching_quartiles': {0.25: 4.9, 0.5: 5.1, 0.75: 5.5}, 'matching_skew': np.float64(0.3080885990086034), 'matching_kurtosis': np.float64(0.279667940009654), 'matching_range': np.float64(3.0), 'matching_iqr': np.float64(0.7000000000000002), 'score_std': np.float64(0.02353102111599714), 'score_95ci': array([0.20358491, 0.3       ]), 'interval': (np.float64(4.3), np.floa

In [3]:
# Example 1: Find rule for maximizing sepal length with default bins (12)
print("\nExample 1: Maximizing sepal length (default bins)")
result = df.findrule(
    target='sepal length (cm)',
    direction='maximize',
    variables=['petal length (cm)', 'petal width (cm)'],
    visualize=True
)
print(result['visualization'])

# Example 2: Same analysis with more bins
print("\nExample 2: Same analysis with 20 bins")
result = df.findrule(
    target='sepal length (cm)',
    direction='maximize',
    variables=['petal length (cm)', 'petal width (cm)'],
    bins=20,
    visualize=True
)
print(result['visualization'])

# Example 3: Find rule for predicting setosa species
print("\nExample 3: Predicting setosa species")
result = df.findrule(
    target='species',
    direction='setosa',
    variables=['sepal length (cm)', 'sepal width (cm)'],
    visualize=True
)
print(result['visualization'])

# Access the metrics for the best rule
print("\nDetailed metrics for the best rule:")
print(f"Score: {result['metrics']['score']:.3f}")
print(f"Coverage: {result['metrics']['coverage']:.1%}")
print(f"Matching samples: {result['metrics']['matching_samples']}")


Example 1: Maximizing sepal length (default bins)
Rule Impact Analysis

Rule Conditions:
  petal length (cm): 4.300 to 6.900

Matching Samples: 77 (51.3% of data)

sepal length (cm) Distribution:
Original median: 5.80
Rule median: 6.40

     4.3 │░░░░░░░░                                                    
     4.6 │░░░░░░░░░░░░░░░░░░                                          
     4.9 │▓▓▓░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                  
     5.2 │▓▓▓░░░░░░░░░░░░░░░                                          
     5.5 │▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓░░░░░░░░░░░░░░░░░░░                         
     5.8 │▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓██████                           
     6.1 │▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓████████████████████████████
     6.4 │▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓███████████████████████              
     6.7 │▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓█████████████████████████          
     7.0 │▓▓▓▓▓▓▓▓▓▓██████████                                        
     7.3 │▓▓▓███                                    

## Example 2: Finding Rules for Categorical Targets

Now let's find rules that predict the 'setosa' species:

In [6]:
result = df.findrule(
    target='species',
    direction='setosa',
    variables=['sepal length (cm)', 'sepal width (cm)'],
    visualize=True
)

print(result['visualization'])

Rule Impact Analysis

Rule Conditions:
  sepal length (cm): 4.300 to 5.500

Matching Samples: 59 (39.3% of data)

species Distribution:

Matching samples:
setosa       | ███████████████      | 79.7%
versicolor   | ███                  | 18.6%
virginica    |                      | 1.7%

Non-matching samples:
virginica    | ██████████           | 53.8%
versicolor   | ████████             | 42.9%
setosa       |                      | 3.3%

Target class improvement: +76.4%
F1 Score: 0.862
Precision: 0.797
Recall: 0.940

Rule details:
{'sepal length (cm)': (np.float64(4.3), np.float64(5.5))}
