In [1]:
# 1. Implement and demonstrate the Find-S algorithm for finding the most specific hypothesis.

def find_s(training_data):
    # Initialize the most specific hypothesis
    hypothesis = ['0'] * len(training_data[0][:-1])
    
    for example in training_data:
        if example[-1] == "yes":  # Only consider positive examples
            for i in range(len(hypothesis)):
                if hypothesis[i] == '0':  # Initial assignment
                    hypothesis[i] = example[i]
                elif hypothesis[i] != example[i]:  # Generalize
                    hypothesis[i] = '?'
                    
    return hypothesis

# Example training data: (Sunny, Warm, Normal, Strong, Warm, Same) -> Yes/No
training_data = [
    ("Sunny", "Warm", "Normal", "Strong", "Warm", "Same", "yes"),
    ("Sunny", "Warm", "High", "Strong", "Warm", "Same", "yes"),
    ("Rainy", "Cold", "High", "Strong", "Warm", "Change", "no"),
    ("Sunny", "Warm", "High", "Strong", "Cool", "Change", "yes")
]

# Find the most specific hypothesis
hypothesis = find_s(training_data)
print("Most specific hypothesis:", hypothesis)


Most specific hypothesis: ['Sunny', 'Warm', '?', 'Strong', '?', '?']


In [30]:
# 2. Implement and demonstrate the Candidate Elimination algorithm using a data set stored as a .CSV file.

import csv

with open("dataset2.csv") as f:
    csv_file = csv.reader(f)
    data = list(csv_file)

specific = data[1][:-1]
general = [['?' for _ in range(len(specific))] for _ in range(len(specific))]

for i in data[1:]:
    if i[-1] == "Yes":
        for j in range(len(specific)):
            if i[j] != specific[j]:
                specific[j] = '?'
                general[j][j] = '?'
    elif i[-1] == "No":
        for j in range(len(specific)):
            if i[j] != specific[j]:
                general[j][j] = '?'
            else:
                general[j][j] = specific[j]

gh = [g for g in general if g != ['?' for _ in range(len(specific))]]

print("\nFinal Specific Hypothesis:\n", specific)
print("\nFinal General Hypothesis:\n", gh)


Final Specific Hypothesis:
 ['Sunny', 'Warm', '?', 'Strong', '?', '?']

Final General Hypothesis:
 [['?', '?', '?', 'Strong', '?', '?']]


In [14]:
# 3. Demonstrate data Preprocessing (Data Cleaning, Integration and Transformation) operations on a suitable data.

import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Sample data creation
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Alice', 'Eve'],
    'Age': [30, None, 35, 30, 40],
    'Salary': [50000, 60000, None, 50000, 70000],
    'Department': ['HR', 'IT', 'Finance', 'HR', 'IT']
}

df = pd.DataFrame(data)

# 1. Data Cleaning
# Handling missing values
df['Age'] = df['Age'].fillna(df['Age'].mean())  # Fill missing ages with mean age
df['Salary'] = df['Salary'].fillna(df['Salary'].median())  # Fill missing salaries with median salary

# Removing duplicate records
df = df.drop_duplicates()

print("Data after cleaning:")
print(df)

# 2. Data Integration
# Create another simple dataset
data2 = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Eve'],
    'Location': ['NY', 'CA', 'TX', 'WA']
}

df2 = pd.DataFrame(data2)

# Merge the datasets
df_merged = pd.merge(df, df2, on='Name', how='left')

print("\nData after integration:")
print(df_merged)

# 3. Data Transformation
# Normalize the Salary column
scaler = MinMaxScaler()
df_merged['Salary'] = scaler.fit_transform(df_merged[['Salary']])

print("\nData after transformation:")
print(df_merged)


Data after cleaning:
      Name    Age   Salary Department
0    Alice  30.00  50000.0         HR
1      Bob  33.75  60000.0         IT
2  Charlie  35.00  55000.0    Finance
4      Eve  40.00  70000.0         IT

Data after integration:
      Name    Age   Salary Department Location
0    Alice  30.00  50000.0         HR       NY
1      Bob  33.75  60000.0         IT       CA
2  Charlie  35.00  55000.0    Finance       TX
3      Eve  40.00  70000.0         IT       WA

Data after transformation:
      Name    Age  Salary Department Location
0    Alice  30.00    0.00         HR       NY
1      Bob  33.75    0.50         IT       CA
2  Charlie  35.00    0.25    Finance       TX
3      Eve  40.00    1.00         IT       WA


In [15]:
# 4. Demonstrate the working of SVM classifier for a suitable dataset.

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the Breast Cancer dataset
cancer = datasets.load_breast_cancer()
X = cancer.data
y = cancer.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create an SVM classifier
svm_classifier = SVC(kernel='linear')

# Train the classifier
svm_classifier.fit(X_train, y_train)

# Make predictions
pred = svm_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, pred)
print(f"Accuracy: {accuracy:.2f}")

# Print predictions
print(f"Predicted labels: {pred}")
print(f"Actual labels: {y_test}")


Accuracy: 0.96
Predicted labels: [1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0
 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 0 0 1 0 1 1 1 0 0 1 1 0 1 0 0 1 1 0 0 0 1 1 1 0 1 1 1 0 1 0 1 1 0 1 0 0
 0 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1]
Actual labels: [1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0
 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 1 0 1 0 1 1 1 0 1 1 1 0 1 0 0 1 1 0 0 0 1 1 1 0 1 1 1 0 1 0 1 1 0 1 0 0
 0 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1]


In [19]:
# 5. Implement and demonstrate the working of the Decision Tree algorithm.

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the Breast Cancer dataset
cancer = datasets.load_breast_cancer()
X = cancer.data
y = cancer.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a Decision Tree classifier
decision_tree = DecisionTreeClassifier()

# Train the classifier
decision_tree.fit(X_train, y_train)

# Make predictions
y_pred = decision_tree.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print predictions
print(f"Predicted labels: {y_pred}")
print(f"Actual labels: {y_test}")


Accuracy: 0.94

Decision Tree Rules:

Predicted labels: [1 0 0 1 1 0 0 0 1 0 0 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 0 0 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0
 1 1 1 0 1 1 0 1 1 0 0 0 0 0 1 1 1 1 0 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 1 0 1 0 1 1 1 0 0 1 1 0 1 0 0 1 1 0 0 0 0 1 1 0 0 1 1 0 1 0 1 1 0 1 0 0
 0 1 0 1 1 1 0 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1]
Actual labels: [1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0
 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 1 0 1 0 1 1 1 0 1 1 1 0 1 0 0 1 1 0 0 0 1 1 1 0 1 1 1 0 1 0 1 1 0 1 0 0
 0 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1]


In [32]:
# 6. Implement Random Forest classifier using python programming.

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the Breast Cancer dataset
cancer = datasets.load_breast_cancer()
X = cancer.data
y = cancer.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Make predictions
pred = clf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, pred)
report = classification_report(y_test, pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)



Accuracy: 0.97
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.94      0.96        63
           1       0.96      0.99      0.98       108

    accuracy                           0.97       171
   macro avg       0.97      0.96      0.97       171
weighted avg       0.97      0.97      0.97       171



In [22]:
# 7. Demonstrate the text classifier using Naive Bayes classifier algorithm.

import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Sample dataset
data = {
    'text': [
        'I love programming in Python',
        'Python is a great language',
        'I hate bugs in my code',
        'Debugging is fun sometimes',
        'I enjoy learning new things',
        'Sometimes I get frustrated with errors',
        'Coding is a valuable skill',
        'Syntax errors are annoying',
        'I love solving problems with code',
        'Errors can be very frustrating'
    ],
    'label': [
        'positive',
        'positive',
        'negative',
        'positive',
        'positive',
        'negative',
        'positive',
        'negative',
        'positive',
        'negative'
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Preprocess the text data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['text'])
y = df['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

# Predict for a new test sentence
test_sentence = ["I am excited about learning machine learning"]
test_sentence_transformed = vectorizer.transform(test_sentence)
prediction = clf.predict(test_sentence_transformed)

print(f'Test Sentence: "{test_sentence[0]}"')
print(f'Predicted Label: {prediction[0]}')


Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

    negative       1.00      1.00      1.00         1
    positive       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

Test Sentence: "I am excited about learning machine learning"
Predicted Label: positive


In [24]:
# 8. Implement the Naive Bayesian classifier for a sample training data set stored as a .CSV file. 


import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
df = pd.read_csv('dataset8.csv')

# Split the dataset into input features and target variable
X = df['text']
y = df['label']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline that vectorizes the text, applies TF-IDF transformation, and then fits a Naive Bayes classifier
model = make_pipeline(CountVectorizer(), TfidfTransformer(), MultinomialNB())

# Train the classifier
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")

# Display a classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.50
Classification Report:
              precision    recall  f1-score   support

    negative       0.50      1.00      0.67         1
    positive       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
# 9. Construct a Bayesian network to analyze the diagnosis of heart patients using heart diseases dataset.

# pip install pgmpy

import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from pgmpy.inference import BeliefPropagation

# Sample dataset
data = {
    'age': [50, 60, 45, 55, 65, 70],
    'blood_pressure': [120, 140, 110, 130, 150, 160],
    'cholesterol': [200, 240, 190, 220, 250, 270],
    'heart_disease': [0, 1, 0, 1, 1, 1]
}
df = pd.DataFrame(data)

# Define the structure of the Bayesian Network
model = BayesianNetwork([('age', 'heart_disease'), 
                         ('blood_pressure', 'heart_disease'),
                         ('cholesterol', 'heart_disease')])

# Fit the model using Maximum Likelihood Estimator
model.fit(df, estimator=MaximumLikelihoodEstimator)

# Inference
inference = VariableElimination(model)
query_result = inference.map_query(variables=['heart_disease'], evidence={'age': 60, 'blood_pressure': 140, 'cholesterol': 240})

print("Diagnosis Prediction:", query_result)

# Display CPDs
for cpd in model.get_cpds():
    print(cpd)


0it [00:00, ?it/s]

0it [00:00, ?it/s]

Diagnosis Prediction: {'heart_disease': 1}
+---------+----------+
| age(45) | 0.166667 |
+---------+----------+
| age(50) | 0.166667 |
+---------+----------+
| age(55) | 0.166667 |
+---------+----------+
| age(60) | 0.166667 |
+---------+----------+
| age(65) | 0.166667 |
+---------+----------+
| age(70) | 0.166667 |
+---------+----------+
+------------------+-----+---------------------+
| age              | ... | age(70)             |
+------------------+-----+---------------------+
| blood_pressure   | ... | blood_pressure(160) |
+------------------+-----+---------------------+
| cholesterol      | ... | cholesterol(270)    |
+------------------+-----+---------------------+
| heart_disease(0) | ... | 0.0                 |
+------------------+-----+---------------------+
| heart_disease(1) | ... | 1.0                 |
+------------------+-----+---------------------+
+---------------------+----------+
| blood_pressure(110) | 0.166667 |
+---------------------+----------+
| blood_pressu

In [26]:
pip install pgmpy



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [28]:
# 10. Implement KNN classification algorithm with an appropriate dataset and analyze the results.

import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the Breast Cancer dataset
cancer = load_breast_cancer()
X = cancer.data  # Features
y = cancer.target  # Target labels

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the KNN classifier
# Use k=5 for this example; you can adjust k based on your needs
knn = KNeighborsClassifier(n_neighbors=5)

# Train the classifier
knn.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = knn.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

# Display the results
print(f"Accuracy: {accuracy:.2f}")

# Display a classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=cancer.target_names))


Accuracy: 0.96
Classification Report:
              precision    recall  f1-score   support

   malignant       0.98      0.90      0.94        63
      benign       0.95      0.99      0.97       108

    accuracy                           0.96       171
   macro avg       0.96      0.95      0.96       171
weighted avg       0.96      0.96      0.96       171

