In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
data = pd.read_csv('data.csv')

In [3]:
X = data.drop('diagnosis', axis=1)  # Features
y = data['diagnosis']  # Labels

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Step 4: Feature Scaling
# Apply feature scaling to the independent variables
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [7]:
y_pred = model.predict(X_test)

In [8]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print(classification_report(y_test, y_pred))

conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

Accuracy: 0.97
              precision    recall  f1-score   support

           0       0.97      0.99      0.98        71
           1       0.98      0.95      0.96        43

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

Confusion Matrix:
[[70  1]
 [ 2 41]]


In [10]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))

# Create a scatter plot for positive diagnosis (class 1)
plt.scatter(X_test[y_test == 1]['id'], X_test[y_test == 1]['diagnosis'], color='blue', label='diagnosis = 1')

# Create a scatter plot for negative diagnosis (class 0)
plt.scatter(X_test[y_test == 0]['id'], X_test[y_test == 0]['diagnosis'], color='red', label='diagnosis = 0')

# Create a decision boundary plot
x_min, x_max = X_test['id'].min(), X_test['id'].max()
y_min, y_max = X_test['diagnosis'].min(), X_test['diagnosis'].max()

xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.4)

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Logistic Regression Decision Boundary')
plt.legend()
plt.show()


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

<Figure size 800x600 with 0 Axes>

In [12]:
plt.figure(figsize=(8, 6))

# Create a scatter plot for positive diagnosis (class 1)
plt.scatter(X_test[y_test == 1]['radius'], X_test[y_test == 1]['texture'], color='blue', label='Diagnosis = 1')

# Create a scatter plot for negative diagnosis (class 0)
plt.scatter(X_test[y_test == 0]['radius'], X_test[y_test == 0]['texture'], color='red', label='Diagnosis = 0')

# Create a decision boundary plot
x_min, x_max = X_test['radius'].min(), X_test['radius'].max()
y_min, y_max = X_test['texture'].min(), X_test['texture'].max()

xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.4)

plt.xlabel('Radius')
plt.ylabel('Texture')
plt.title('Logistic Regression Decision Boundary')
plt.legend()
plt.show()


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

<Figure size 800x600 with 0 Axes>

In [13]:
plt.figure(figsize=(8, 6))

# Create a scatter plot for positive diagnosis (class 1)
plt.scatter(X_test[y_test == 1]['radius_mean'], X_test[y_test == 1]['texture_mean'], color='blue', label='Diagnosis = 1')

# Create a scatter plot for negative diagnosis (class 0)
plt.scatter(X_test[y_test == 0]['radius_mean'], X_test[y_test == 0]['texture_mean'], color='red', label='Diagnosis = 0')

# Create a decision boundary plot
x_min, x_max = X_test['radius_mean'].min(), X_test['radius_mean'].max()
y_min, y_max = X_test['texture_mean'].min(), X_test['texture_mean'].max()

xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.4)

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Logistic Regression Decision Boundary')
plt.legend()
plt.show()


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

<Figure size 800x600 with 0 Axes>