**Decision Tree**

In [1]:
import numpy as np
import pandas as pd

In [2]:
def load_dataset():
    dataset = pd.read_csv('E:\College\Semester 6\Machine Learning\LAB FINAL\data.csv')
    return dataset

In [3]:
def calculate_entropy(target_col):
    _, counts = np.unique(target_col, return_counts=True)
    probabilities = counts / counts.sum()
    entropy = sum(probabilities * -np.log2(probabilities))
    return entropy

In [4]:
def calculate_information_gain(data, split_attribute_name, target_name):
    total_entropy = calculate_entropy(data[target_name])
    values, counts = np.unique(data[split_attribute_name], return_counts=True)

    weighted_entropy = 0
    total_instances = len(data)

    for i in range(len(values)):
        value = values[i]
        subset = data[data[split_attribute_name] == value]
        subset_entropy = calculate_entropy(subset[target_name])
        weight = counts[i] / total_instances
        weighted_entropy += weight * subset_entropy

    information_gain = total_entropy - weighted_entropy
    return information_gain

In [5]:
def get_best_attribute(data, target_name):
    information_gains = []
    for col in data.columns:
        if col != target_name:
            information_gain = calculate_information_gain(data, col, target_name)
            information_gains.append((col, information_gain))
    best_attribute, _ = max(information_gains, key=lambda x: x[1])
    return best_attribute

In [6]:
def create_decision_tree(data, target_name):
    if len(data[target_name].unique()) == 1:
        return data[target_name].iloc[0]
    if len(data.columns) == 1:
        return data[target_name].mode()[0]

    best_attribute = get_best_attribute(data, target_name)
    decision_tree = {best_attribute: {}}
    values = data[best_attribute].unique()
    for value in values:
        sub_data = data[data[best_attribute] == value].drop(best_attribute, axis=1)
        decision_tree[best_attribute][value] = create_decision_tree(sub_data, target_name)

    return decision_tree

In [7]:
def print_decision_tree(decision_tree, indent=''):
    if isinstance(decision_tree, dict):
        attribute = list(decision_tree.keys())[0]
        print(indent + attribute)
        for value, subtree in decision_tree[attribute].items():
            print(indent + '  ' + str(value) + ' ->')
            print_decision_tree(subtree, indent + '     ')
    else:
        print(indent + '  ' + decision_tree)

In [8]:
def predict(instance, decision_tree):
    attribute = list(decision_tree.keys())[0]
    value = instance[attribute]
    if value in decision_tree[attribute]:
        subtree = decision_tree[attribute][value]
        if isinstance(subtree, dict):
            return predict(instance, subtree)
        else:
            return subtree
    else:
        return 'Unknown'

In [9]:
dataset = load_dataset()
target_name = dataset.columns[-1]
decision_tree = create_decision_tree(dataset, target_name)
print_decision_tree(decision_tree)

stalk-root
  e ->
     gill-spacing
       c ->
          class
            p ->
               cap-shape
                 x ->
                    cap-surface
                      s ->
                         cap-color
                           n ->
                              bruises
                                t ->
                                   odor
                                     p ->
                                        gill-attachment
                                          f ->
                                             gill-size
                                               n ->
                                                  gill-color
                                                    k ->
                                                       stalk-shape
                                                         e ->
                                                            stalk-surface-above-ring
                                               

**Accuracy**

In [10]:
def calculate_accuracy(dataset, decision_tree):
    target_name = dataset.columns[-1]
    correct_predictions = 0
    total_instances = len(dataset)

    for _, instance in dataset.iterrows():
        instance_dict = instance.to_dict()
        true_label = instance_dict[target_name]
        del instance_dict[target_name]
        predicted_label = predict(instance_dict, decision_tree)
        if predicted_label == true_label:
            correct_predictions += 1

    accuracy = correct_predictions / total_instances
    return accuracy

dataset = load_dataset()

target_name = dataset.columns[-1]
decision_tree = create_decision_tree(dataset, target_name)

accuracy = calculate_accuracy(dataset, decision_tree)
print('Accuracy:', accuracy*100, "%")


Accuracy: 66.96208764155588 %


**With using Libraries**

In [12]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
heart_data = pd.read_csv('E:\College\Semester 6\Machine Learning\LAB FINAL\heart_disease_data.csv')
heart_data.head()
X = heart_data.drop(columns = ['target'])
Y = heart_data['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
DecisionTree = DecisionTreeClassifier(criterion="entropy",max_depth=10)

In [13]:
DecisionTree.fit(X_train,Y_train)
predicted_values = DecisionTree.predict(X_test)
x = accuracy_score(Y_test, predicted_values)
print("Accuracy is: ", x*100, "%")

Accuracy is:  80.32786885245902 %


**Neural Network**

In [14]:
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [None]:
heart_data = pd.read_csv('E:\College\Semester 6\Machine Learning\LAB FINAL\heart_disease_data.csv')
heart_data.head()
X = heart_data.drop(columns = ['target'])
Y = heart_data['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')])

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(X_train, Y_train, epochs=10, batch_size=32, validation_data=(X_test, Y_test))

In [None]:
loss, accuracy = model.evaluate(X_test, Y_test)
print("Test Accuracy:", accuracy*100, "%")

**Support Vector Machine**

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
heart_data = pd.read_csv('E:\College\Semester 6\Machine Learning\LAB FINAL\heart_disease_data.csv')
heart_data.head()
X = heart_data.drop(columns = ['target'])
Y = heart_data['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
SVM = SVC(kernel='rbf')

In [None]:
SVM.fit(X_train,Y_train)
predicted_values = SVM.predict(X_test)
x = accuracy_score(Y_test, predicted_values)
print("Accuracy is: ", x*100, "%")

**Random Forest**

In [None]:
from sklearn.ensemble import RandomForestClassifier
heart_data = pd.read_csv('E:\College\Semester 6\Machine Learning\LAB FINAL\heart_disease_data.csv')
heart_data.head()
X = heart_data.drop(columns = ['target'])
Y = heart_data['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
RandomForest = RandomForestClassifier()

In [None]:
RandomForest.fit(X_train,Y_train)
predicted_values = RandomForest.predict(X_test)
x = accuracy_score(Y_test, predicted_values)
print("Accuracy is: ", x*100, "%")

In [1]:
acc = []
acc.append(86.88524590163934)
acc.append(80.32786885245902)
acc.append(65.21311211585999)
acc.append(73.77049180327869)
acc.append(63.934426229508205)
acc.append(86.88524590163934)

In [None]:
import seaborn as sns
plt.figure(figsize=[10,5])
plt.title('Accuracy Comparison')
plt.xlabel('Accuracy')
plt.ylabel('Classifier')
sns.barplot(x = acc,y = ["Naiive Bayes","Logistic Refression","Neural Network", "Decision Tree", "Support Vector Machine", "Random Forest"])