In [None]:
import pandas as pd
import numpy
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

%inline matplotlib

In [None]:
dataset = "../data/data.csv"

df = pd.read_csv(dataset)
df.head()

In [None]:
#drop id column

df.drop('id', axis=1, inplace=True)
df.drop('Unnamed: 32', axis=1, inplace=True)
df.head()

In [None]:
df.isin([0]).sum()

df['concavity_mean'].replace(0, df['concavity_mean'].mean(), inplace=True)
df['concave points_mean'].replace(0, df['concave points_mean'].mean(), inplace=True)
df['concavity_se'].replace(0, df['concavity_se'].mean(), inplace=True)
df['concave points_se'].replace(0, df['concave points_se'].mean(), inplace=True)
df['concavity_worst'].replace(0, df['concavity_worst'].mean(), inplace=True)
df['concave points_worst'].replace(0, df['concave points_worst'].mean(), inplace=True)

In [None]:
df.head()

In [None]:
print(df['diagnosis'].value_counts())
df['diagnosis'] = df['diagnosis'].map({'B':0, 'M':1})
df.head()

In [None]:
cat_cols = [col for col in df.columns if df[col].dtype == 'object']
num_cols = [col for col in df.columns if df[col].dtype != 'object']

In [None]:
plt.figure(figsize = (20, 40))
plotnumber = 1
for column in num_cols:
    if plotnumber <= 31:
        ax = plt.subplot(7, 5, plotnumber)
        sns.histplot(df[column],color='red',)
        plt.xlabel(column)
    plotnumber += 1

plt.tight_layout()
plt.show()

In [None]:
train, test = train_test_split(df, test_size=0.4, random_state=28)
train, validation = train_test_split(df, test_size=0.1, random_state=28)

print(f"Train set has {train.shape[0]} rows and {train.shape[1]} columns")
print(f"Test set has {test.shape[0]} rows and {test.shape[1]} columns")
print(f"Validation set has {validation.shape[0]} rows and {validation.shape[1]} columns")
test.head()

In [None]:
le = LabelEncoder()

for col in cat_cols:
    train[col] = le.fit_transform(train[col])
    validation[col] = le.transform(validation[col])
    test[col] = le.transform(test[col])

X_train = train.drop('diagnosis', axis=1)
y_train = train['diagnosis']

X_test = test.drop('diagnosis', axis=1)
y_test = test['diagnosis']

X_validation = validation.drop('diagnosis', axis=1)
y_validation = validation['diagnosis']

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_validation = scaler.transform(X_validation)

In [None]:
def run_model(model, X_train, y_train, X_test, y_test, X_validation, y_validation):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"Model = {model}")
    print(f"Accuracy score: {accuracy_score(y_test, y_pred)}")
    print(f"Precision score: {sklearn.metrics.precision_score(y_test, y_pred)}")
    print(f"Recall score: {sklearn.metrics.recall_score(y_test, y_pred)}")
    print(f"F1 score: {sklearn.metrics.f1_score(y_test, y_pred)}")
    print(f"Validation score: {model.score(X_validation, y_validation)}")
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, cmap='magma')
    plt.xlabel('Predicted')
    plt.ylabel('Truth')
    plt.show()
    print()

In [None]:
from sklearn.utils import all_estimators

all_estimators

models = []

for name, model in all_estimators(type_filter='classifier'):
    models.append(model)

models

In [None]:
for model in models:
    try:
        run_model(model(), X_train, y_train, X_test, y_test, X_validation, y_validation)
    except:
        pass

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

model = Sequential()
model.add(Dense(30, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(15, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

model.fit(x=X_train, y=y_train, epochs=600, validation_data=(X_validation, y_validation), verbose=1)

losses = pd.DataFrame(model.history.history)
losses.plot()
plt.show()