# TFLearn using Breast Cancer dataset 

## Loading required packages 

In [1]:
import pandas as pd
import numpy as np
import tflearn
from sklearn.datasets import load_breast_cancer

Instructions for updating:
non-resource variables are not supported in the long term
curses is not supported on this machine (please install/reinstall curses for an optimal experience)


## Loading the data-set 

In [2]:
cancer = load_breast_cancer(as_frame=True)
cancer_df = cancer.frame
cancer_features = cancer_df.drop(columns='target')

In [3]:
cancer

{'data':      mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
 0          17.99         10.38          122.80     1001.0          0.11840   
 1          20.57         17.77          132.90     1326.0          0.08474   
 2          19.69         21.25          130.00     1203.0          0.10960   
 3          11.42         20.38           77.58      386.1          0.14250   
 4          20.29         14.34          135.10     1297.0          0.10030   
 ..           ...           ...             ...        ...              ...   
 564        21.56         22.39          142.00     1479.0          0.11100   
 565        20.13         28.25          131.20     1261.0          0.09780   
 566        16.60         28.08          108.30      858.1          0.08455   
 567        20.60         29.33          140.10     1265.0          0.11780   
 568         7.76         24.54           47.92      181.0          0.05263   
 
      mean compactness  mean concavity  me

In [3]:
X = cancer_features
X.shape

(569, 30)

In [5]:
cancer_df.columns

Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry', 'worst fractal dimension',
       'target'],
      dtype='object')

In [4]:
y = cancer_df[["target"]]
y.shape

(569, 1)

## Viewing list of features 

In [7]:
cancer = load_breast_cancer()
list(cancer.feature_names)

['mean radius',
 'mean texture',
 'mean perimeter',
 'mean area',
 'mean smoothness',
 'mean compactness',
 'mean concavity',
 'mean concave points',
 'mean symmetry',
 'mean fractal dimension',
 'radius error',
 'texture error',
 'perimeter error',
 'area error',
 'smoothness error',
 'compactness error',
 'concavity error',
 'concave points error',
 'symmetry error',
 'fractal dimension error',
 'worst radius',
 'worst texture',
 'worst perimeter',
 'worst area',
 'worst smoothness',
 'worst compactness',
 'worst concavity',
 'worst concave points',
 'worst symmetry',
 'worst fractal dimension']

## Viewing list of targets 

In [8]:
list(cancer.target_names)

['malignant', 'benign']

## Checking feature and target variable info 

In [5]:
n_samples, n_features = X.shape
n_positive = sum(y.target)
n_negative = n_samples - n_positive

print("Total number of samples: {}".format(n_samples))
print("Total number of features: {}".format(n_features))
print("Number of positive classes: {}".format(n_positive))
print("Number of negative classes: {}".format(n_negative))

Total number of samples: 569
Total number of features: 30
Number of positive classes: 357
Number of negative classes: 212


## Splitting the dataset into train and test sets 

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [8]:
X_train.shape

(381, 30)

## One-Hot-Encoding target 

In [9]:
from tflearn.data_utils import to_categorical

y_train = to_categorical(y_train, nb_classes=2)
y_test = to_categorical(y_test, nb_classes=2)
y_train[0]

array([1., 0.])

## Building Network using TFLearn 

In [11]:
network = tflearn.input_data(shape=[None,30])
network = tflearn.fully_connected(network, n_units=32, activation="relu")
network = tflearn.fully_connected(network, n_units=32, activation="relu")
network = tflearn.fully_connected(network, n_units=2, activation="softmax")

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [12]:
network = tflearn.regression(network, optimizer="sgd", loss="categorical_crossentropy")

## Building DNN Model of Network

In [13]:
model = tflearn.DNN(network)

In [14]:
X_train.shape

(381, 30)

In [15]:
y_train.shape

(381, 2)

## Training the Model 

In [16]:
model.fit(X_train, y_train, n_epoch=400)

Training Step: 2399  | total loss: [1m[32m0.66522[0m[0m | time: 0.013s
| SGD | epoch: 400 | loss: 0.66522 -- iter: 320/381
Training Step: 2400  | total loss: [1m[32m0.66461[0m[0m | time: 0.017s
| SGD | epoch: 400 | loss: 0.66461 -- iter: 381/381
--


## Predicting on test-data and checking Accuracy

In [17]:
train_score = model.evaluate(X_train, y_train)
test_score = model.evaluate(X_test, y_test)
print("Training score: {:.2f}".format(train_score[0]))
print("Test score: {:.2f}".format(test_score[0]))

Training score: 0.62
Test score: 0.64
