In [1]:
from google.colab import drive
drive.mount('/content/mydrive')

Mounted at /content/mydrive


In [2]:
import os
#!pip install openpyxl



In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
file = "/content/mydrive/My Drive/Python/Clz/AI/LABWORKS/nb2.xlsx"
df = pd.read_excel(file,index_col=0)
print(df)

    Open  High  Low  Close  Volume  Market Cap      Sector        Industry
SN                                                                        
1    100   105   95    102     150        5000  Technology        Software
2    102   107   98    104     170        5200  Technology        Software
3    104   110  100    108     160        5100  Technology        Hardware
4    108   115  105    112     180        5300  Technology        Hardware
5    112   120  110    115     190        5400  Healthcare   Biotechnology
6    115   122  112    118     200        5500  Healthcare   Biotechnology
7    118   125  115    120     210        5600  Healthcare  Pharmaceutical
8    120   128  118    123     220        5700  Healthcare  Pharmaceutical
9    123   130  120    125     230        5800     Finance         Banking
10   125   132  123    128     240        5900     Finance         Banking


In [8]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

label_encoders = {}
for column in ['Sector', 'Industry']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

print(df)


    Open  High  Low  Close  Volume  Market Cap  Sector  Industry
SN                                                              
1    100   105   95    102     150        5000       2         4
2    102   107   98    104     170        5200       2         4
3    104   110  100    108     160        5100       2         2
4    108   115  105    112     180        5300       2         2
5    112   120  110    115     190        5400       1         1
6    115   122  112    118     200        5500       1         1
7    118   125  115    120     210        5600       1         3
8    120   128  118    123     220        5700       1         3
9    123   130  120    125     230        5800       0         0
10   125   132  123    128     240        5900       0         0


In [5]:
X, y=df.drop(["Close"],axis=1),df["Close"]
print(X)
print(y)

    Open  High  Low  Volume  Market Cap  Sector  Industry
SN                                                       
1    100   105   95     150        5000       2         4
2    102   107   98     170        5200       2         4
3    104   110  100     160        5100       2         2
4    108   115  105     180        5300       2         2
5    112   120  110     190        5400       1         1
6    115   122  112     200        5500       1         1
7    118   125  115     210        5600       1         3
8    120   128  118     220        5700       1         3
9    123   130  120     230        5800       0         0
10   125   132  123     240        5900       0         0
SN
1     102
2     104
3     108
4     112
5     115
6     118
7     120
8     123
9     125
10    128
Name: Close, dtype: int64


In [6]:
import numpy as np
import pandas as pd

class NaiveBayes:
    def __init__(self):
        self.features = None
        self.likelihoods = {}
        self.class_priors = {}
        self.pred_priors = {}
        self.X_train = None
        self.y_train = None
        self.train_size = 0
        self.num_feats = 0

    def fit(self, X, y):
        self.features = list(X.columns)
        self.X_train = X
        self.y_train = y
        self.train_size = X.shape[0]
        self.num_feats = X.shape[1]

        for feature in self.features:
            self.likelihoods[feature] = {}
            self.pred_priors[feature] = {}
            for feat_val in np.unique(self.X_train[feature]):
                self.pred_priors[feature][feat_val] = 0
                for outcome in np.unique(self.y_train):
                    self.likelihoods[feature][f'{feat_val}_{outcome}'] = 0
                    self.class_priors[outcome] = 0

        self._calc_class_prior()
        self._calc_likelihoods()
        self._calc_predictor_prior()

    def _calc_class_prior(self):
        for outcome in np.unique(self.y_train):
            outcome_count = sum(self.y_train == outcome)
            self.class_priors[outcome] = outcome_count / self.train_size

    def _calc_likelihoods(self):
        for feature in self.features:
            for outcome in np.unique(self.y_train):
                outcome_count = sum(self.y_train == outcome)
                feat_likelihood = self.X_train[feature][self.y_train[self.y_train == outcome].index.values.tolist()].value_counts().to_dict()
                for feat_val, count in feat_likelihood.items():
                    self.likelihoods[feature][f'{feat_val}_{outcome}'] = (count + 1) / (outcome_count + len(feat_likelihood))  # Laplace smoothing

    def _calc_predictor_prior(self):
        for feature in self.features:
            feat_val_counts = self.X_train[feature].value_counts().to_dict()
            for feat_val, count in feat_val_counts.items():
                self.pred_priors[feature][feat_val] = count / self.train_size

    def predict(self, X):
        results = []
        X = np.array(X)

        for query in X:
            probs_outcome = {}
            for outcome in np.unique(self.y_train):
                prior = self.class_priors[outcome]
                likelihood = 1

                for feat, feat_val in zip(self.features, query):
                    if f'{feat_val}_{outcome}' in self.likelihoods[feat]:
                        likelihood *= self.likelihoods[feat][f'{feat_val}_{outcome}']
                    else:
                        likelihood *= 1 / (self.train_size + len(self.features))  # Adjust for unseen feature values

                posterior = likelihood * prior
                probs_outcome[outcome] = posterior

            result = max(probs_outcome, key=lambda x: probs_outcome[x])
            results.append(result)

        return np.array(results)

def accuracy_score(y_true, y_pred):
    return round(float(sum(y_pred == y_true)) / float(len(y_true)) * 100, 2)



In [10]:

# Example usage
nb_clf = NaiveBayes()
nb_clf.fit(X, y)

print("Train Accuracy: {}".format(accuracy_score(y, nb_clf.predict(X))))

# Query
query = np.array([[110, 115, 105, 180, 5300, 2, 2]])
print("Query:- {} ---> {}".format(query, nb_clf.predict(query)))

Train Accuracy: 100.0
Query:- [[ 110  115  105  180 5300    2    2]] ---> [112]
