In [1]:
import pandas as pd 

In [3]:
data = [
    ['Sunny', 'Hot', 'High', False, 'not_play'],
    ['Sunny', 'Hot', 'High', True, 'not_play'],
    ['Overcast', 'Hot', 'High', False, 'play'],
    ['Rainy', 'Mild', 'High', False, 'play'],
    ['Rainy', 'Cool', 'Normal', False, 'play'],
    ['Rainy', 'Cool', 'Normal', True, 'not_play'],
    ['Overcast', 'Cool', 'Normal', True, 'play'],
    ['Sunny', 'Mild', 'High', False, 'not_play'],
    ['Sunny', 'Cool', 'Normal', False, 'play'],
    ['Rainy', 'Mild', 'Normal', False, 'play'],
    ['Sunny', 'Mild', 'Normal', True, 'play'],
    ['Overcast', 'Mild', 'High', True, 'play'],
    ['Overcast', 'Hot', 'Normal', False, 'play'],
    ['Rainy', 'Mild', 'High', True, 'not_play']
]
dataset = pd.DataFrame(data, columns=['Outlook', 'Temperature', 'Humidity', 'Windy', 'Play'])


In [4]:
# Helper function to check if a record is covered by a rule
def record_is_covered(record, rule):
    # Only iterate over the actual condition parts of the rule
    for condition_attr, condition_value in rule.items():
        if condition_attr not in ['class', 'default'] and record[condition_attr] != condition_value:
            return False
    return True

# Helper function to get class counts for a given subset of data
def get_class_counts(df, target_class_name):
    return df[target_class_name].value_counts()


In [5]:
# 2. Learn-One-Rule (dataset, A, y)
def learn_one_rule(dataset, A, target_class, target_class_name):
    """
    Learns a single, high-accuracy rule for the target_class.
    """
    best_rule = None
    best_accuracy = -1
    
    # Iterate through each possible condition (attribute-value pair)
    for attr in A:
        for value in dataset[attr].unique():
            condition = {attr: value}
            covered_records = dataset[dataset.apply(lambda r: record_is_covered(r, condition), axis=1)]
            
            if len(covered_records) > 0:
                accuracy = len(covered_records[covered_records[target_class_name] == target_class]) / len(covered_records)
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_rule = condition
    
    return best_rule


In [8]:
# 3. Main Sequential Covering Algorithm
def sequential_covering(dataset, target_class_name):
    """
    Implements the sequential covering algorithm, prioritizing minority classes.
    """
    R = [] # Rule list
    
    # Step 1: Order classes by frequency (minority first)
    class_counts = get_class_counts(dataset, target_class_name).sort_values(ascending=True)
    sorted_classes = class_counts.index.tolist()
    
    remaining_dataset = dataset.copy()
    
    # Step 2: Iterate through each class, from minority to majority
    for y in sorted_classes:
        if y == sorted_classes[-1]:
            # This is the majority class, handled by the default rule
            continue
            
        print(f"\nLearning rules for class: '{y}'")
        while True:
            # Step 3: Learn a rule for the current target class
            r = learn_one_rule(remaining_dataset, remaining_dataset.columns[:-1], y, target_class_name)
            
            # Stop if no good rule can be found
            if not r:
                print(f"  No more rules found for class '{y}'.")
                break
            
            # Step 4: Add the rule and remove covered examples
            r['class'] = y
            R.append(r)
            
            covered_mask = remaining_dataset.apply(lambda row: record_is_covered(row, r), axis=1)
            covered_count = sum(covered_mask)
            
            if covered_count == 0:
                print(f"  Rule {r} covers no remaining records.")
                break
            
            remaining_dataset = remaining_dataset[~covered_mask]
            print(f"  Rule found: {r}. Covered {covered_count} records. {len(remaining_dataset)} records remaining.")
            
            # Stopping condition: stop if no examples of the current class remain
            if sum(remaining_dataset[target_class_name] == y) == 0:
                print(f"  All instances of class '{y}' covered.")
                break

    # Step 5: Add the default rule for the majority class
    default_class = sorted_classes[-1]
    R.append({'class': default_class, 'default': True})
    
    return R


In [9]:
# Execute the algorithm
attributes = dataset.columns[:-1]
rule_list = sequential_covering(dataset, 'Play')

# --- CORRECTED printing loop ---
print("\nFinal learned rule list:")
for rule in rule_list:
    if 'default' in rule:
        print(f"ELSE => {rule['class']}")
    else:
        conditions = [f"{k}='{v}'" for k, v in rule.items() if k != 'class']
        antecedent = ' and '.join(conditions)
        print(f"IF {antecedent} => {rule['class']}")


Learning rules for class: 'not_play'
  Rule found: {'Outlook': 'Sunny', 'class': 'not_play'}. Covered 5 records. 9 records remaining.
  Rule found: {'Windy': np.True_, 'class': 'not_play'}. Covered 4 records. 5 records remaining.
  All instances of class 'not_play' covered.

Final learned rule list:
IF Outlook='Sunny' => not_play
IF Windy='True' => not_play
ELSE => play
