In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression     # Import the logistic regression model
from sklearn.datasets import load_breast_cancer         # Import the breast cancer dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix         # Import all the evaluation metrics


# load_breast_cancer() returns an object similar to a dictionary. Each of the values are numpy arrays
cancerData = load_breast_cancer()     

# keys: 'data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'

#print("The shape of the \"data\" numpy array is:", cancerData["data"].shape)
#print("The shape of the \"target\" numpy array is:", cancerData["target"].shape)

# load the features/input data and target/output data into a pandas dataframe
dataframe = pd.DataFrame(cancerData["data"], columns=cancerData["feature_names"])
dataframe["target"] = cancerData["target"]

# Seperate the data into the features (x) and target (y). Both x and y are numpy arrays
x = dataframe[cancerData["feature_names"]].values
y = dataframe["target"].values

# Build a linear regression model
model = LogisticRegression()   # our model is an instance of the LogisticRegression class
model.fit(x, y)

# See what the model predicts for the first datapoint in our dataset. Remember: model.predict() takes a 2d numpy array as input
print(model.score(x, y))

# Print out the evaluation metrics for this model
y_pred = model.predict(x)
print("accuracy:", accuracy_score(y, y_pred))
print("precision:", precision_score(y, y_pred))
print("recall:", recall_score(y, y_pred))
print("f1 score:", f1_score(y, y_pred))




0.9472759226713533
accuracy: 0.9472759226713533
precision: 0.947945205479452
recall: 0.969187675070028
f1 score: 0.9584487534626039


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [None]:
'''
# create a numpy array
x = dataframe[["Pclass", "Male", "Age", "Siblings/Spouses", "Parents/Children", "Fare"]].values
y = dataframe["Survived"].values

# All sklearn models are built as python classes. We first instantiate the class
# So model is an instance of the LogisticRegression class
model = LogisticRegression()

# The fit method is used to build the model
# Fitting the model means using the data to choose a line of best fit
model.fit(x, y)

# Calculate the accuracy score of the model
yPred = model.predict(x)
modelAccuracyScore = (yPred == y).sum() / len(y)
print(modelAccuracyScore)

# The score() attribute will also give us the accuracy score of the model
print(model.score(x, y))
'''