In [90]:
from google.colab import drive
drive.mount('/content/mydrive')

Drive already mounted at /content/mydrive; to attempt to forcibly remount, call drive.mount("/content/mydrive", force_remount=True).


In [91]:
import os
!pip install openpyxl



In [92]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
file = "/content/mydrive/My Drive/Python/Clz/AI/LABWORKS/nb1.xlsx"
df = pd.read_excel(file,index_col=0)
print(df)

     Symptoms  Severity    Duration  Location Age Group Treated?
SN                                                              
1       fever      high  short-term    clinic     adult      yes
2    headache      mild   long-term  hospital     child       no
3       fever      high  short-term      home     adult       no
4       cough  moderate  short-term    clinic     child      yes
5       fever  moderate   long-term  hospital     adult       no
6    headache  moderate   long-term      home     adult      yes
7       cough      high  short-term    clinic     adult      yes
8       fever      mild   long-term  hospital     child       no
9    headache      high  short-term      home     adult      yes
10      cough  moderate   long-term      home     child       no
11     nausea      high  short-term    clinic     adult       no
12  dizziness  moderate   long-term  hospital     child      yes
13      fever       low   long-term      home     adult      yes
14     nausea      mild  

In [93]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

label_encoders = {}
for column in ['Symptoms', 'Severity','Duration','Location','Age Group','Treated?']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le
print(df)


    Symptoms  Severity  Duration  Location  Age Group  Treated?
SN                                                             
1          2         0         1         0          0         1
2          3         2         0         2          1         0
3          2         0         1         1          0         0
4          0         3         1         0          1         1
5          2         3         0         2          0         0
6          3         3         0         1          0         1
7          0         0         1         0          0         1
8          2         2         0         2          1         0
9          3         0         1         1          0         1
10         0         3         0         1          1         0
11         4         0         1         0          0         0
12         1         3         0         2          1         1
13         2         1         0         1          0         1
14         4         2         1        

In [94]:
X, y=df.drop(["Treated?"],axis=1),df["Treated?"]
print(X)
print(y)

    Symptoms  Severity  Duration  Location  Age Group
SN                                                   
1          2         0         1         0          0
2          3         2         0         2          1
3          2         0         1         1          0
4          0         3         1         0          1
5          2         3         0         2          0
6          3         3         0         1          0
7          0         0         1         0          0
8          2         2         0         2          1
9          3         0         1         1          0
10         0         3         0         1          1
11         4         0         1         0          0
12         1         3         0         2          1
13         2         1         0         1          0
14         4         2         1         1          1
15         1         0         1         0          0
16         3         0         0         2          1
SN
1     1
2     0
3     0
4

In [95]:
import numpy as np
import pandas as pd

class NaiveBayes:
    def __init__(self):
        self.features = None
        self.likelihoods = {}
        self.class_priors = {}
        self.pred_priors = {}
        self.X_train = None
        self.y_train = None
        self.train_size = 0
        self.num_feats = 0

    def fit(self, X, y):
        self.features = list(X.columns)
        self.X_train = X
        self.y_train = y
        self.train_size = X.shape[0]
        self.num_feats = X.shape[1]

        for feature in self.features:
            self.likelihoods[feature] = {}
            self.pred_priors[feature] = {}
            for feat_val in np.unique(self.X_train[feature]):
                self.pred_priors[feature][feat_val] = 0
                for outcome in np.unique(self.y_train):
                    self.likelihoods[feature][f'{feat_val}_{outcome}'] = 0
                    self.class_priors[outcome] = 0

        self._calc_class_prior()
        self._calc_likelihoods()
        self._calc_predictor_prior()

    def _calc_class_prior(self):
        for outcome in np.unique(self.y_train):
            outcome_count = sum(self.y_train == outcome)
            self.class_priors[outcome] = outcome_count / self.train_size

    def _calc_likelihoods(self):
        for feature in self.features:
            for outcome in np.unique(self.y_train):
                outcome_count = sum(self.y_train == outcome)
                feat_likelihood = self.X_train[feature][self.y_train[self.y_train == outcome].index.values.tolist()].value_counts().to_dict()
                for feat_val, count in feat_likelihood.items():
                    self.likelihoods[feature][f'{feat_val}_{outcome}'] = (count + 1) / (outcome_count + len(feat_likelihood))  # Laplace smoothing

    def _calc_predictor_prior(self):
        for feature in self.features:
            feat_val_counts = self.X_train[feature].value_counts().to_dict()
            for feat_val, count in feat_val_counts.items():
                self.pred_priors[feature][feat_val] = count / self.train_size

    def predict(self, X):
        results = []
        X = np.array(X)

        for query in X:
            probs_outcome = {}
            for outcome in np.unique(self.y_train):
                prior = self.class_priors[outcome]
                likelihood = 1

                for feat, feat_val in zip(self.features, query):
                    if f'{feat_val}_{outcome}' in self.likelihoods[feat]:
                        likelihood *= self.likelihoods[feat][f'{feat_val}_{outcome}']
                    else:
                        likelihood *= 1 / (self.train_size + len(self.features))  # Adjust for unseen feature values

                posterior = likelihood * prior
                probs_outcome[outcome] = posterior

            result = max(probs_outcome, key=lambda x: probs_outcome[x])
            results.append(result)

        return np.array(results)

def accuracy_score(y_true, y_pred):
    return round(float(sum(y_pred == y_true)) / float(len(y_true)) * 100, 2)



In [96]:

# Example usage
nb_clf = NaiveBayes()
nb_clf.fit(X, y)

print("Train Accuracy: {}".format(accuracy_score(y, nb_clf.predict(X))))

# Query
query = np.array([[1, 0, 1,0,0]])
print("Query:- {} ---> {}".format(query, nb_clf.predict(query)))

Train Accuracy: 93.75
Query:- [[1 0 1 0 0]] ---> [1]
