EXP-1

In [2]:
# Simple dataset: (Outlook, Temp, Label)
data = [
    ("Sunny", "Hot", "No"),
    ("Sunny", "Mild", "Yes"),
    ("Rainy", "Cool", "Yes"),
    ("Rainy", "Mild", "Yes"),
    ("Sunny", "Cool", "Yes")
]

# Split positives and negatives
positives = [x for x in data if x[2] == "Yes"]
negatives = [x for x in data if x[2] == "No"]

rules = []

def learn_rule(positives, negatives):
    # Try to find simple rule: Outlook=value or Temp=value
    attributes = [0, 1]  # index of attributes
    for attr in attributes:
        values = set([p[attr] for p in positives])
        for v in values:
            # Rule: IF attr=v THEN Yes
            covered_pos = [p for p in positives if p[attr] == v]
            covered_neg = [n for n in negatives if n[attr] == v]

            # Sequential covering: rule must not cover negatives
            if len(covered_neg) == 0:
                return (attr, v), covered_pos
    return None, []

# Main Sequential Covering Loop
while positives:
    rule, covered = learn_rule(positives, negatives)
    if rule is None:
        break
    rules.append(rule)
    for c in covered:
        positives.remove(c)

# Print learned rules
for i, r in enumerate(rules, 1):
    attr = "Outlook" if r[0] == 0 else "Temperature"
    print(f"Rule {i}: IF {attr} = {r[1]} THEN Yes")


Rule 1: IF Outlook = Rainy THEN Yes
Rule 2: IF Temperature = Cool THEN Yes
Rule 3: IF Temperature = Mild THEN Yes


EXP-2

In [3]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier, _tree
import numpy as np

# Load dataset
X, y = load_iris(return_X_y=True)
clf = DecisionTreeClassifier(max_depth=3, random_state=42)
clf.fit(X, y)

feature_names = load_iris().feature_names
target_names = load_iris().target_names

rules = []

# Recursive rule extraction
def extract_rules(node, rule):
    if tree.feature[node] != _tree.TREE_UNDEFINED:
        name = feature_names[tree.feature[node]]
        threshold = tree.threshold[node]

        # left child rule
        extract_rules(tree.children_left[node],
                      rule + [f"{name} <= {threshold:.2f}"])

        # right child rule
        extract_rules(tree.children_right[node],
                      rule + [f"{name} > {threshold:.2f}"])
    else:
        # Leaf → convert to rule
        class_label = target_names[np.argmax(tree.value[node])]
        rules.append((rule, class_label))

tree = clf.tree_
extract_rules(0, [])

# Print rules
for r, label in rules:
    print("IF " + " AND ".join(r) + f" THEN class = {label}")


IF petal length (cm) <= 2.45 THEN class = setosa
IF petal length (cm) > 2.45 AND petal width (cm) <= 1.75 AND petal length (cm) <= 4.95 THEN class = versicolor
IF petal length (cm) > 2.45 AND petal width (cm) <= 1.75 AND petal length (cm) > 4.95 THEN class = virginica
IF petal length (cm) > 2.45 AND petal width (cm) > 1.75 AND petal length (cm) <= 4.85 THEN class = virginica
IF petal length (cm) > 2.45 AND petal width (cm) > 1.75 AND petal length (cm) > 4.85 THEN class = virginica


EXP-3

In [4]:
# --- Domain Theory (facts + rules) ---
domain = {
    "light": ["blockA", "blockC"],
    "stable": ["blockB", "blockD"]
}

# Rule: can_stack(X,Y) if light(X) and stable(Y)
def can_stack(x, y):
    return (x in domain["light"]) and (y in domain["stable"])

# Positive Example
example = ("blockA", "blockB")

# EBL Explanation + Generalization
def explain_and_generalize(example):
    x, y = example
    
    # Explain why the example is positive
    if can_stack(x, y):
        explanation = [
            f"{x} is light",
            f"{y} is stable"
        ]
        
        # Generalized EBL Rule
        generalized_rule = "IF light(X) AND stable(Y) THEN can_stack(X, Y)"
        
        return explanation, generalized_rule
    else:
        return None, None

exp, rule = explain_and_generalize(example)

print("Explanation for positive example:")
for e in exp:
    print("-", e)

print("\nGeneralized Rule:")
print(rule)


Explanation for positive example:
- blockA is light
- blockB is stable

Generalized Rule:
IF light(X) AND stable(Y) THEN can_stack(X, Y)


EXP-4

In [5]:
import numpy as np
import random

# Environment states (0 to 4)
states = [0, 1, 2, 3, 4]
actions = ["left", "right"]

# Q-table (5 states × 2 actions)
Q = np.zeros((5, 2))

alpha = 0.5     # learning rate
gamma = 0.9     # discount factor
epsilon = 0.2   # exploration rate

def step(state, action):
    if action == "left":
        next_state = max(0, state - 1)
    else:  # "right"
        next_state = min(4, state + 1)

    reward = 10 if next_state == 4 else -1
    return next_state, reward

# Training episodes
for episode in range(50):
    state = 0  # start always at state 0
    done = False

    while not done:
        # ε-greedy action selection
        if random.random() < epsilon:
            a = random.choice([0, 1])
        else:
            a = np.argmax(Q[state])

        action = actions[a]
        next_state, reward = step(state, action)

        # Q-learning update rule
        Q[state, a] = Q[state, a] + alpha * (
            reward + gamma * np.max(Q[next_state]) - Q[state, a]
        )

        state = next_state
        if state == 4:
            done = True

print("Final Q-Table:\n", Q)

# Extract optimal policy
policy = [actions[np.argmax(Q[s])] for s in states]
print("\nOptimal Policy:", policy)


Final Q-Table:
 [[ 3.03753622  4.58      ]
 [ 3.08185241  6.2       ]
 [ 3.79695929  8.        ]
 [ 5.90604401 10.        ]
 [ 0.          0.        ]]

Optimal Policy: ['right', 'right', 'right', 'right', 'left']


EXP-5

In [6]:
import numpy as np
import random

# Grid World 3x3 → states 0–8
states = list(range(9))

# Actions
actions = ["up","down","left","right"]

# Q-table (9 states × 4 actions)
Q = np.zeros((9, 4))

alpha = 0.5
gamma = 0.9
epsilon = 0.2

def to_state(r, c): return r * 3 + c
def to_pos(s): return divmod(s, 3)

# Movement rules
def step(state, action):
    r, c = to_pos(state)

    if action == "up":    r = max(0, r - 1)
    if action == "down":  r = min(2, r + 1)
    if action == "left":  c = max(0, c - 1)
    if action == "right": c = min(2, c + 1)

    next_state = to_state(r, c)
    reward = 10 if next_state == 8 else -1   # Goal at state 8
    done = (next_state == 8)
    return next_state, reward, done

# -------- Random Policy Baseline --------
def random_policy_episodes(n=20):
    total = 0
    for _ in range(n):
        s = 0
        episode_reward = 0
        done = False
        while not done:
            a = random.choice(actions)
            s, r, done = step(s, a)
            episode_reward += r
        total += episode_reward
    return total / n

baseline_reward = random_policy_episodes()

# -------- Q-Learning Training --------
for ep in range(100):
    s = 0
    done = False

    while not done:
        # epsilon-greedy
        if random.random() < epsilon:
            a_idx = random.randint(0,3)
        else:
            a_idx = np.argmax(Q[s])

        next_s, reward, done = step(s, actions[a_idx])

        # TD Update
        Q[s, a_idx] = Q[s, a_idx] + alpha * (
            reward + gamma * np.max(Q[next_s]) - Q[s, a_idx]
        )

        s = next_s

# Optimal policy after learning
policy = [actions[np.argmax(Q[s])] for s in states]

print("Baseline Reward (Random Policy):", baseline_reward)
print("\nLearned Optimal Policy (state 0–8):")
print(policy)
print("\nQ-Table:\n", Q)


Baseline Reward (Random Policy): -10.9

Learned Optimal Policy (state 0–8):
['down', 'down', 'down', 'right', 'right', 'down', 'right', 'right', 'up']

Q-Table:
 [[ 3.0834321   4.58        2.94059752  4.27284022]
 [-0.975       6.19941958 -1.30125    -1.2125    ]
 [ 2.12031226  7.45312473 -1.25625    -0.975     ]
 [ 2.5584458   1.759375    4.4400146   6.2       ]
 [ 4.51860817  7.046875    4.53854366  8.        ]
 [ 3.82656178 10.          4.65        6.9296875 ]
 [-0.5        -0.5         0.          6.234375  ]
 [-0.5         0.          0.          9.9609375 ]
 [ 0.          0.          0.          0.        ]]
