In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df = pd.read_csv("breast cancer.csv")
if "Unnamed: 32" in df.columns:
    df = df.drop(columns=["Unnamed: 32"])
df["diagnosis"] = df["diagnosis"].map({"M": 1, "B": 0})
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
sns.countplot(x=df["diagnosis"])
plt.show()

In [None]:
plt.figure(figsize=(12,8))
sns.heatmap(df.corr(), cmap="coolwarm")
plt.show()

In [None]:
df.hist(figsize=(14,12), bins=20, edgecolor='black')
plt.tight_layout()
plt.show()

In [None]:
X = df.drop("diagnosis", axis=1)
y = df["diagnosis"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

In [None]:
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.show()

In [None]:
print(classification_report(y_test, y_pred))