In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# load data
file_path = "/content/datacum.txt"
with open(file_path, "r") as file:
    lines = file.readlines()
# initialize variables to store the data
data_groups = []
status_groups = []
# loop through the lines
for line in lines:
    # check if the line starts with "#####"
    if not(line.startswith("#####")):
        # otherwise, parse the line and append it to the current group
        attributes = line.strip().split(",")
        # only interested in the attributes 2 to 11
        data_point = [int(attr) for attr in attributes[1:11]]
        data_groups.append(data_point)

# remove umpty array
data_groups = [group for group in data_groups if group]

# print the data 
print(data_groups)


# preprocess data if needed
column_names = ['label','Clump Thickness','Uniformity of Cell Size','Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial Cell Size','Bare Nuclei','Bland Chromatin','Normal Nucleoli', 'Mitoses']
data =pd.DataFrame(data_groups, columns=column_names)
# split data into train and test sets
X = data.drop('label', axis=1)
y = data['label']
# 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# build Naive Bayes model
model = GaussianNB()

# train the model
model.fit(X_train, y_train)

# make predictions
y_pred = model.predict(X_test)

# evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# classification report
print(classification_report(y_test, y_pred))


[[2, 5, 1, 1, 1, 2, 1, 3, 1, 1], [2, 5, 4, 4, 5, 7, 10, 3, 2, 1], [2, 3, 1, 1, 1, 2, 2, 3, 1, 1], [2, 6, 8, 8, 1, 3, 4, 3, 7, 1], [2, 4, 1, 1, 3, 2, 1, 3, 1, 1], [4, 8, 10, 10, 8, 7, 10, 9, 7, 1], [2, 1, 1, 1, 1, 2, 10, 3, 1, 1], [2, 2, 1, 2, 1, 2, 1, 3, 1, 1], [2, 2, 1, 1, 1, 2, 1, 1, 1, 5], [2, 4, 2, 1, 1, 2, 1, 2, 1, 1], [2, 1, 1, 1, 1, 1, 1, 3, 1, 1], [2, 2, 1, 1, 1, 2, 1, 2, 1, 1], [4, 5, 3, 3, 3, 2, 3, 4, 4, 1], [2, 1, 1, 1, 1, 2, 3, 3, 1, 1], [4, 8, 7, 5, 10, 7, 9, 5, 5, 4], [4, 7, 4, 6, 4, 6, 1, 4, 3, 1], [2, 4, 1, 1, 1, 2, 1, 2, 1, 1], [2, 4, 1, 1, 1, 2, 1, 3, 1, 1], [4, 10, 7, 7, 6, 4, 10, 4, 1, 2], [2, 6, 1, 1, 1, 2, 1, 3, 1, 1], [4, 7, 3, 2, 10, 5, 10, 5, 4, 4], [4, 10, 5, 5, 3, 6, 7, 7, 10, 1], [2, 3, 1, 1, 1, 2, 1, 2, 1, 1], [4, 8, 4, 5, 1, 2, 0, 7, 3, 1], [2, 1, 1, 1, 1, 2, 1, 3, 1, 1], [4, 5, 2, 3, 4, 2, 7, 3, 6, 1], [2, 3, 2, 1, 1, 1, 1, 2, 1, 1], [2, 5, 1, 1, 1, 2, 1, 2, 1, 1], [2, 2, 1, 1, 1, 2, 1, 2, 1, 1], [2, 1, 1, 3, 1, 2, 1, 1, 1, 1], [2, 3, 1, 1, 1, 1, 1, 2, 1,