In [None]:
#Naive Bayes Algorithm

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

def load_wine_data():
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    data = pd.read_csv(url, sep=";")
    return data

def preprocess_data(data):
    X = data.drop('quality', axis=1)
    y = data['quality']
    y = (y >= 6).astype(int)
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
    
    return X, y

def split_data(X, y, test_size=0.2, random_state=42):
    return train_test_split(X, y, test_size=test_size, random_state=random_state)

def train_naive_bayes(X_train, y_train):
    clf = GaussianNB()
    clf.fit(X_train, y_train)
    return clf

def predict_and_evaluate(clf, X_test, y_test):
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy, y_pred

def main():
    data = load_wine_data()
    X, y = preprocess_data(data)
    
    X_train, X_test, y_train, y_test = split_data(X, y)
    
    clf = train_naive_bayes(X_train, y_train)
    
    accuracy, y_pred = predict_and_evaluate(clf, X_test, y_test)
    
    print(f"Accuracy: {accuracy:.2f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

if __name__ == "__main__":
    main()

In [None]:
#Bayesian Belief Network

import pandas as pd
from sklearn.datasets import load_wine
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import ExpectationMaximization
from pgmpy.inference import VariableElimination
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import mutual_info_classif
from sklearn.preprocessing import KBinsDiscretizer

wine = load_wine()
df = pd.DataFrame(data=wine.data, columns=wine.feature_names)
df['target'] = wine.target

df.columns = [col.replace(' ', '_').replace('/', '_') for col in df.columns]

print("Feature names in the dataset:", df.columns.tolist())

discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='kmeans')
df[df.columns[:-1]] = discretizer.fit_transform(df[df.columns[:-1]])

train_data, test_data = train_test_split(df, test_size=0.3, random_state=42)

mi_scores = mutual_info_classif(train_data.drop(columns='target'), train_data['target'])
feature_ranking = pd.Series(mi_scores, index=train_data.columns[:-1]).sort_values(ascending=False)
top_features = feature_ranking.index[:5]

model = BayesianNetwork()
for feature in top_features:
    model.add_node(feature)
model.add_node('target')
model.add_node('alcohol')
model.add_node('malic_acid')
model.add_node('ash')
model.add_node('alcalinity_of_ash')
model.add_node('magnesium')
model.add_node('total_phenols')
model.add_node('flavanoids')
model.add_node('nonflavanoid_phenols')
model.add_node('proanthocyanins')
model.add_node('color_intensity')
model.add_node('hue')
model.add_node('proline')

for feature in top_features:
    model.add_edge(feature, 'target')

model.fit(train_data, estimator=ExpectationMaximization)

infer = VariableElimination(model)

def predict(instance):
    evidence = instance.drop('target').to_dict()
    query_result = infer.query(variables=['target'], evidence=evidence, show_progress=False)
    return query_result.values.argmax()

test_data['Predicted'] = test_data.apply(predict, axis=1)

accuracy = accuracy_score(test_data['target'], test_data['Predicted'])
print(f'Accuracy of the Bayesian Belief Network Classifier: {accuracy:.2f}')

In [None]:
#Candidate Elimination Algo

import pandas as pd

# Example dataset
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'Play Tennis': [0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]
}

# Convert to DataFrame
df = pd.DataFrame(data)

def candidate_elimination(data, target_attribute):
    # Specific boundary (most specific hypothesis)
    specific = ['?'] * (len(data.columns) - 1)

    # General boundary (most general hypothesis)
    general = [['?'] * (len(data.columns) - 1)]

    # Convert data to lists of tuples, excluding the target attribute column
    examples = data.drop(target_attribute, axis=1).to_records(index=False)
    labels = data[target_attribute].tolist()

    for index, instance in enumerate(examples):
        instance = tuple(instance)
        label = labels[index]

        if label == 1:  # If the instance is positive
            for i, val in enumerate(specific):
                if specific[i] == '?':
                    specific[i] = instance[i]
                elif specific[i] != instance[i]:
                    specific[i] = '?'
            general = [g for g in general if any(specific[i] == '?' or specific[i] == g[i] for i in range(len(g)))]
        else:  # If the instance is negative
            general = [g for g in general if not all(specific[i] == '?' or specific[i] == g[i] or g[i] == instance[i] for i in range(len(g)))]
            new_general = []
            for g in general:
                for i, val in enumerate(g):
                    if val == '?':
                        continue
                    temp = list(g)
                    temp[i] = '?' if specific[i] == instance[i] else specific[i]
                    new_general.append(tuple(temp))
            general.extend(new_general)

    return specific, general

# Run the Candidate-Elimination algorithm
specific, general = candidate_elimination(df, 'Play Tennis')

print(f'Specific boundary: {specific}')
print(f'General boundary: {general}')


In [None]:
#Apriori Algo

import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder

def load_data(sample_size=None):
    # Load the Online Retail dataset
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx"
    df = pd.read_excel(url, engine='openpyxl')
    
    # Clean the data
    df['Description'] = df['Description'].str.strip()
    df.dropna(subset=['InvoiceNo', 'Description'], inplace=True)
    
    # Convert InvoiceNo to string and remove credit notes (invoices starting with 'C')
    df['InvoiceNo'] = df['InvoiceNo'].astype(str)
    df = df[~df['InvoiceNo'].str.startswith('C')]
    
    # Group the data by InvoiceNo and create a list of items for each transaction
    transactions = df.groupby('InvoiceNo')['Description'].apply(list).reset_index()
    
    # Sample the transactions if a sample size is specified
    if sample_size:
        transactions = transactions.sample(n=sample_size, random_state=42)

    return transactions['Description'].tolist()

def apply_apriori(transactions, min_support=0.01):
    # Convert transactions to a one-hot encoded DataFrame
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    
    # Apply Apriori algorithm
    frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
    return frequent_itemsets

def find_most_frequent_itemsets(frequent_itemsets):
    max_support = frequent_itemsets['support'].max()
    most_frequent = frequent_itemsets[frequent_itemsets['support'] == max_support]
    return most_frequent

def main():
    print("Loading and processing the Online Retail dataset...")
    try:
        transactions = load_data(sample_size=5000)  # Sample 5000 transactions for efficiency
    except Exception as e:
        print(f"Error loading data: {e}")
        return

    print(f"Total number of transactions: {len(transactions)}")
    if transactions:
        print(f"Sample transaction: {transactions[0][:5]}...")  # Show first 5 items of the first transaction
    else:
        print("No transactions found.")
        return

    print("\nApplying Apriori algorithm...")
    try:
        frequent_itemsets = apply_apriori(transactions, min_support=0.05)  # Increased min_support to reduce complexity
    except Exception as e:
        print(f"Error applying Apriori algorithm: {e}")
        return

    print(f"\nTotal number of frequent itemsets found: {len(frequent_itemsets)}")
    print("\nTop 10 frequent itemsets by support:")
    print(frequent_itemsets.sort_values(by='support', ascending=False).head(10))

    most_frequent = find_most_frequent_itemsets(frequent_itemsets)
    print("\nMost frequent itemset(s):")
    print(most_frequent)

if __name__ == "__main__":
    main()
