In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('Breast_Cancer.csv')
print(df.head())
print(df.info())

In [None]:
print('Dataset Shape:', df.shape)
print(df.describe().T)

print('Missing Values:', df.isnull().sum())

In [None]:
categorical_columns = df.select_dtypes(include=['object']).columns
for col in categorical_columns:
    if col != 'Status':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])

print(df.head())

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(df.drop(columns=['Status']).corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
selected_columns = ['Age', 'Marital Status', 'Grade', 'Survival Months', 'Status']
df = df[selected_columns]
le = LabelEncoder()
df['Marital Status'] = le.fit_transform(df['Marital Status'])
df['Grade'] = le.fit_transform(df['Grade'])
df['Status'] = le.fit_transform(df['Status'])
X = df[['Age', 'Marital Status', 'Grade', 'Survival Months']]
y = df['Status']


In [None]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics


X_axis = list(range(1, 11))
acc = [] # Initialize acc as a list
x = range(1,11)

for i in list(range(1, 11)):
    knn_model = KNeighborsClassifier(n_neighbors = i)
    knn_model.fit(X_train, y_train)
    prediction = knn_model.predict(X_test)
    acc.append(metrics.accuracy_score(prediction, y_test))


acc = pd.Series(acc, index=X_axis)

plt.plot(X_axis, acc)
plt.xticks(x)
plt.title("Finding best value for n_estimators")
plt.xlabel("n_estimators")
plt.ylabel("Accuracy")
plt.grid()
plt.show()
print('Highest value: ',acc.values.max())

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 7, metric = 'minkowski', p = 2)
knn.fit(X_train, y_train)

In [None]:
from sklearn.svm import SVC
svc = SVC(kernel= 'linear',random_state=42)
svc.fit(X_train, y_train)

In [None]:
from sklearn.naive_bayes import GaussianNB
nb= GaussianNB()
nb.fit(X_train, y_train)

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt= DecisionTreeClassifier(criterion='entropy',random_state=42)
dt.fit(X_train, y_train)

In [None]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression(random_state=42)
lr.fit(X_train, y_train)

In [None]:
y_pred_lr = lr.predict(X_test)
y_pred_knn=knn.predict(X_test)
y_pred_svc=svc.predict(X_test)
y_pred_nb=nb.predict(X_test)
y_pred_dt=dt.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred_lr)
accuracy_score(y_test, y_pred_svc)
accuracy_score(y_test, y_pred_knn)
accuracy_score(y_test, y_pred_nb)
accuracy_score(y_test, y_pred_dt)

In [None]:
print(f"Logistic Regression Accuracy:{accuracy_score(y_test,y_pred_lr)}")
print(f"SVM Accuracy:{accuracy_score(y_test,y_pred_svc)}")
print(f"KNN Accuracy:{accuracy_score(y_test,y_pred_knn)}")
print(f"Naive Bayes Regression Accuracy:{accuracy_score(y_test,y_pred_nb)}")
print(f"Decision Trees Regression Accuracy:{accuracy_score(y_test,y_pred_dt)}")

In [None]:
from sklearn.metrics import confusion_matrix
cm= confusion_matrix(y_test,y_pred_knn)
cm

In [None]:
sns.heatmap(pd.DataFrame(cm), annot=True)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred_knn))