In [None]:
import pandas as pd
import numpy as np

def initialize_hypothesis(first_positive_instance):
    specific_h = list(first_positive_instance[:-1])  # Most specific hypothesis
    general_h = [['?' for _ in range(len(specific_h))] for _ in range(len(specific_h))]  # Most general hypothesis
    return specific_h, general_h

def is_consistent(hypothesis, instance):
    for i, attr in enumerate(hypothesis):
        if attr != '?' and attr != instance[i]:
            return False
    return True

def generalize_S(specific_h, instance):
    for i in range(len(specific_h)):
        if specific_h[i] != instance[i]:
            specific_h[i] = '?'
    return specific_h

def specialize_G(general_h, specific_h, instance):
    new_general_h = []
    for gh in general_h:
        if is_consistent(gh, instance):
            for i in range(len(gh)):
                if gh[i] != specific_h[i] and gh[i] == '?':
                    new_g = gh.copy()
                    new_g[i] = specific_h[i]
                    if new_g not in new_general_h:
                        new_general_h.append(new_g)
    return new_general_h

def remove_inconsistent_hypotheses(general_h, instance, positive=True):
    new_general_h = []
    for gh in general_h:
        if is_consistent(gh, instance) == positive:
            new_general_h.append(gh)
    return new_general_h

def candidate_elimination(data):
    # Get the first positive instance
    first_positive = None
    for instance in data:
        if instance[-1] == 'yes':
            first_positive = instance
            break

    if first_positive is None:
        return None, None

    specific_h, general_h = initialize_hypothesis(first_positive)

    print("\nInitial Specific Hypothesis:", specific_h)
    print("Initial General Hypothesis:", general_h)

    # Process each instance
    for i, instance in enumerate(data):
        print(f"\nInstance {i+1}: {instance}")

        if instance[-1] == 'yes':  # Positive instance
            specific_h = generalize_S(specific_h, instance[:-1])
            general_h = remove_inconsistent_hypotheses(general_h, instance[:-1], positive=True)
        else:  # Negative instance
            general_h = specialize_G(general_h, specific_h, instance[:-1])
            general_h = remove_inconsistent_hypotheses(general_h, instance[:-1], positive=False)

        print("Specific Hypothesis:", specific_h)
        print("General Hypothesis:", general_h)

    return specific_h, general_h

# Load and process the data
df = pd.read_csv('netflix_titles.csv')
data = df.values.tolist()

print("Dataset:")
print(df)
print("\nRunning Candidate Elimination Algorithm...")

final_S, final_G = candidate_elimination(data)

print("\nFinal Hypothesis Space:")
print("Specific Boundary (S):", final_S)
print("General Boundary (G):", final_G)


Dataset:
     show_id     type                  title         director  \
0         s1    Movie   Dick Johnson Is Dead  Kirsten Johnson   
1         s2  TV Show          Blood & Water              NaN   
2         s3  TV Show              Ganglands  Julien Leclercq   
3         s4  TV Show  Jailbirds New Orleans              NaN   
4         s5  TV Show           Kota Factory              NaN   
...      ...      ...                    ...              ...   
8802   s8803    Movie                 Zodiac    David Fincher   
8803   s8804  TV Show            Zombie Dumb              NaN   
8804   s8805    Movie             Zombieland  Ruben Fleischer   
8805   s8806    Movie                   Zoom     Peter Hewitt   
8806   s8807    Movie                 Zubaan      Mozez Singh   

                                                   cast        country  \
0                                                   NaN  United States   
1     Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...   Sout