<a href="https://colab.research.google.com/github/bltsezer/DataSet_Uuygulama/blob/main/_03_BreastCancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

class DataPreprocessor:
    
    def preprocess(self, train, test):
        column_list = ['age', 'sex', 'on_thyroxine', 'query_on_thyroxine', 'on_antithyroid_medication', 'sick', 'pregnant', 'thyroid_surgery', 'I131_treatment', 'query_hypothyroid', 'query_hyperthyroid', 'lithium', 'goitre', 'tumor', 'hypopituitary', 'psych', 'TSH', 'T3', 'TT4', 'T4U', 'FTI', 'Class']
        train = pd.DataFrame(train.iloc[:,0:22].values, columns=column_list)
        test = pd.DataFrame(test.iloc[:,0:22].values, columns=column_list)
        
        return train, test
    
    def split_predictors(self, data):
        
        data_X = data.drop(['Class'], axis=1)
        
        data_y = data['Class']
        
        return data_X, data_y
    
    def scale_data(self, train_X, test_X):
        
        sc = StandardScaler()
        train_X = sc.fit_transform(train_X)
        test_X = sc.transform(test_X)
        
        return train_X, test_X
    
    def validation_split(self, train_X, train_y, test_size = 0.2, random_state = 1):
        X_train, X_validation, y_train, y_validation = train_test_split(train_X, train_y, test_size = 0.2, random_state = 1)        
        
        return X_train, X_validation, y_train, y_validation

In [None]:
import pandas

class DatasetLoader:
    path = ""
       
    def __init__(self, path):
        self.path = path
        
    def load(self):
        
        data = pandas.read_csv(self.path, header=None, sep=' ')
        return data
        
    def print_shape(self, data):
      
        print(data.shape)

In [None]:
import pickle
import numpy
import pandas

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier

class ModelBuilder:
    def classifier_model(self):
        model = Sequential()
        
        model.add(Dense(48, kernel_initializer = 'uniform', input_dim=21, activation='relu'))
        
        model.add(Dropout(0.25))
        
        model.add(Dense(48, kernel_initializer = 'uniform', activation='relu'))
        
        model.add(Dropout(0.25))
        
        model.add(Dense(48, kernel_initializer = 'uniform', activation='relu'))
        
        model.add(Dropout(0.25))
        
        model.add(Dense(3, kernel_initializer = 'uniform', activation='softmax'))

        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        
        return model
    
    def get_classifier(self):
        
        classifier = KerasClassifier(build_fn = self.classifier_model, batch_size = 10, epochs = 100)
        
        return classifier
    
    def finalize_and_save(self, model, train_X, train_y, filename='../model/final_model'):
        
        model.fit(train_X, train_y)
        
        self.save_model(model, filename)
        
    def save_model(self, model, filename='../model/saved_model'):
        
        pickle.dump(model, open(filename, 'wb' ))
        print("\nModel is saved..\n")
    
    def load_model(self, model_filename):
        
        loaded_model = pickle.load(open(model_filename, 'rb' ))
        
        return loaded_model
    
    def validate(self, model, train_X, train_y):
        results = cross_val_score(estimator = model, X = train_X, y = train_y, cv = 10, n_jobs = 3)
        
        print("\nCross Validation - Accuracy : %.2f%% (%.2f%%)\n" % (results.mean()*100.0, results.std()*100.0))
        
    def evaluate(self, model, train_X, train_y, test_X, test_y):
        model.fit(train_X, train_y, batch_size = 10, epochs = 100) 
        
        y_test_pred = model.predict(test_X)
        
        cm = confusion_matrix(test_y, y_test_pred)
        
        print("\nModel Evaluation - Accuracy is %.3f%% \n" % ((cm[0][0]+cm[1][1]+cm[2][2])*100/test_y.size))
        
    def check_prediction(self, model, test_X, test_y):
        
        y_test_pred = model.predict(test_X)
        
        
        cm = confusion_matrix(test_y, y_test_pred)
        
        
        y_test_pred = self.map_pred_class(y_test_pred)
        
        print("\n............Predictions............\n")
        print(y_test_pred.reshape(-1,1))
        print("\nTest Data - Accuracy: %.3f%% \n" % ((cm[0][0]+cm[1][1]+cm[2][2])*100/test_y.size))
    
    def save_predictions(self, model, test_X):
        
        predictions = model.predict(test_X)

        predictions = self.map_pred_class(predictions)
        
        pandas.DataFrame(predictions).to_csv('../prediction/predictions.csv', index=False)
        print("\nSAVE..\n")
    
    def map_pred_class(self, preditions):
        pred_map = ['Normal'  if(x==3) else 'Subnormal' if (x==2) else 'HyperThyroid'  for x in preditions]
        
        return numpy.array(pred_map)

In [None]:
!wget "http://cimalab.intec.co/applications/thyroid/maligns.zip"

!wget "http://cimalab.intec.co/applications/thyroid/benigns.zip"


In [None]:
!unzip maligns.zip
!unzip benigns.zip

In [None]:
!dir

In [None]:
!rm maligns.zip
!rm benigns.zip

In [None]:
!pwd
# %cd /content

In [None]:
!dir

In [None]:
   
data_loader = DatasetLoader('bening')
train = data_loader.load()

data_loader = DatasetLoader('/maligns')
test = data_loader.load()

dp = DataPreprocessor()
train, test = dp.preprocess(train, test)

train_X, train_y = dp.split_predictors(train)
test_X, test_y = dp.split_predictors(test)



X_train, X_val, y_train, y_val = dp.validation_split(train_X, train_y)


X_train, X_val = dp.scale_data(X_train, X_val)

mb = ModelBuilder()
classifier = mb.get_classifier()        
       
mb.validate(classifier, X_train, y_train)

mb.evaluate(classifier, X_train, y_train, X_val, y_val)



train_X, test_X = dp.scale_data(train_X, test_X)

mb.validate(classifier, train_X, train_y)

classifier.fit(train_X, train_y, batch_size = 10, epochs = 100)


mb.check_prediction(classifier, test_X, test_y)



mb.save_model(classifier, '../model/final_model1')

mb.save_predictions(classifier, test_X)