In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier,ExtraTreesClassifier,BaggingClassifier,RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from seaborn import heatmap
from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBClassifier
from warnings import filterwarnings
filterwarnings("ignore")
np.random.seed(0)

In [7]:
class Classification(object):
    def __init__(self, filename, filename1, filename2):
        try:
            self.dataset = pd.read_csv(filename)
            self.dataset1 = pd.read_csv(filename1)
            self.dataset2 = pd.read_csv(filename2)
            self.dataset = self.dataset.fillna(self.dataset.mean())
            self.dataset1 = self.dataset1.fillna(self.dataset1.mean())
            #self.dataset = self.dataset.dropna()
            #self.dataset1 = self.dataset1.dropna()
            self.dataset = pd.get_dummies(self.dataset, columns = ["Sex"], drop_first = True)
            self.dataset1 = pd.get_dummies(self.dataset1, columns = ["Sex"], drop_first = True)
            self.column_names = ["Pclass", "Sex_male", "Age"]
            self._x = self.dataset[self.column_names]
            self.y = self.dataset["Survived"]
            scaler = MinMaxScaler()
            self.x = scaler.fit_transform(self._x)
            self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(self.x, self.y, 
                                                                                    test_size = 0.2, random_state = 0)
            #self.x_train["Age"] = sc_x.fit_transform(self.x_tra)
            self.x1_train = self.dataset[self.column_names]
            self.y1_train = self.dataset["Survived"]
            self.x1_test = self.dataset1[self.column_names]
            self.y1_test = self.dataset2["Survived"]
        except Exception as e:
                print('In init: ',e)
                
    def fit_logistic(self):
        try:
            self.model = LogisticRegression()
            self.model.fit(self.x1_train, self.y1_train)
        except Exception as e:
            print('In logistic: ',e)
            return False
        return True
    
    def fit_knn(self):
        try:
            self.model = KNeighborsClassifier(n_neighbors= 5, metric = 'minkowski', p =  2)
            self.model.fit(self.x1_train, self.y1_train)
        except Exception as e:
            print('In knn: ',e)
            return False
        return True
#https://machinelearningmastery.com/ensemble-machine-learning-algorithms-python-scikit-learn/    
    def fit_boost(self):
        try:
            kfold = KFold(n_splits=10, random_state=0)
            ada_model = AdaBoostClassifier(n_estimators=10, random_state=0)
            grad_boost_model = GradientBoostingClassifier(n_estimators=10, random_state=0)
            
            results_ada = cross_val_score(ada_model, self.x, self.y, cv=kfold)
            result_gb = cross_val_score(grad_boost_model, self.x, self.y, cv=kfold)
            print(f'Adaboost=====\nResults: {results_ada}\nMean: {results_ada.mean()}\n\n')
            print(f'Gradient Boost========\nResults: {result_gb}\nMEan: {result_gb.mean()}\n\n')
        except Exception as e:
            print('In boost: ',e)
        
    def fit_SVM(self):
        try:
            self.model = SVC(kernel = 'linear', random_state = 0)
            self.model.fit(self.x1_train, self.y1_train)
        except Exception as e:
            print('In SVM: ',e)
            return False
        return True
            
    def fit_NB(self):
        try:
            self.model = GaussianNB()
            self.model.fit(self.x1_train, self.y1_train)
        except Exception as e:
            print('In NB: ',e)
            return False
        return True
    
    def fit_DecisionTree(self):
        try:
            self.model = DecisionTreeClassifier(criterion='entropy', random_state = 0)
            self.model.fit(self.x1_train, self.y1_train)
        except Exception as e:
            print('In decision tree: ',e)
            return False
        return True
    
    def fit_RandomTree(self):
        try:
            self.model = RandomForestClassifier(n_estimators = 10, criterion = 'entropy')
            self.model.fit(self.x1_train, self.y1_train)
        except Exception as e:
            print('In random tree: ',e)
            return False
        return True
    
    def transform(self):
        try:
            self.y_pred = self.model.predict(self.x1_test)
            self.y_pred.shape()
            cm = confusion_matrix(self.y1_test, self.y_pred)
            heatmap(cm, annot = True)
            print(f'Accuracy {accuracy_score(self.y_test, self.y_pred)*100} and f1 score {f1_score(self.y_test, self.y_pred)*100}')
        except Exception as e:
            print('In transform: ',e)
            return None
        return self.y_pred, self.y1_test
        
    def draw_graph(self):
        try:
            fig, ax = plt.subplots(2, 2, figsize=(15, 15))
            ax[0, 0].scatter(self._x['Sex_male'], self.y)
            ax[0,0].title.set_text('Sex_male to survived graph')
            ax[0,0].set_xlabel('Sex_male')
            ax[0,0].set_ylabel('Survived')
            ax[0, 1].scatter(self._x['Pclass'], self.y)
            ax[0,0].title.set_text('Pclass to survived graph')
            ax[0,1].set_xlabel('Pclass')
            ax[0,1].set_ylabel('Survived')
            ax[1, 0].scatter(self._x['Age'], self.y)
            ax[0,0].title.set_text('Age to survived graph')
            ax[1,0].set_xlabel('Age')
            ax[1,0].set_ylabel('Survived')
            plt.show()
        except Exception as e:
            print('In graph: ',e)
#https://machinelearningmastery.com/ensemble-machine-learning-algorithms-python-scikit-learn/    
    def bagging(self):
        try:
            kfold = KFold(n_splits = 10, random_state = 0)
            cart = DecisionTreeClassifier()
            bagging_model = BaggingClassifier(base_estimator = cart, n_estimators = 10, random_state = 0)
            RandomForest_model = RandomForestClassifier(n_estimators = 10)
            extraForest_model = ExtraTreesClassifier(n_estimators = 10)
            
            bagging_results = cross_val_score(bagging_model, self.x, self.y, cv= kfold)
            randomforest_results = cross_val_score(RandomForest_model, self.x, self.y, cv=kfold)
            extraforest_results = cross_val_score(extraForest_model, self.x, self.y, cv=kfold)
            print(f'Bagging Accuracy ===== {bagging_results.mean()}\n\n')
            print(f'Random Forest Accuracy ===== {randomforest_results.mean()}\n\n')
            print(f'Extra Forest Accuracy ===== {extraforest_results.mean()}\n\n')
        except Exception as e:
            print('In bagging: ',e)
            
    def voting(self):
        try:
            kfold = KFold(n_splits = 10, random_state = 0)
            estimators = []
            model1 = KNeighborsClassifier(n_neighbors= 10, metric = 'minkowski', p =  2)
            model2 = DecisionTreeClassifier(criterion='entropy', random_state = 123)
            model3 = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
            model4 = LogisticRegression()
            estimators.append(('knn', model1))
            estimators.append(('dtc', model2))
            estimators.append(('rfc', model3))
            #estimators.append(('logreg', model4))
            ensemble = VotingClassifier(estimators)
            results = cross_val_score(ensemble, self.x, self.y, cv=kfold)
            print(f'Voting======\nMax: {results.max()}\nMean: {results.mean()}')
        except Exception as e:
            print(e)
            
        
if __name__ == "__main__":
    class_model = Classification("train.csv", "test.csv", "gender_submission.csv")
    class_model.fit_logistic()
    #class_model.fit_knn()
    #class_model.fit_SVM()
    #class_model.fit_NB()
    #class_model.fit_DecisionTree()
    #class_model.fit_RandomTree()
    #class_model.fit_boost()
    #class_model.bagging()
    #class_model.voting()
    #class_model.draw_graph()
    y_pred = class_model.transform()

In transform:  'tuple' object is not callable
