In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np

from Module.weight_im import *
from Module.weight_sk import *
from Module.train_clf import *

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn import svm

from tensorflow.keras import optimizers
import tensorflow as tf
from tensorflow.keras.utils import to_categorical

xlsx500 = pd.read_csv('Datas/resample_rd_500.csv') # copy는 excel읽을 때였음
xlsx10000 = pd.read_csv('Datas/resample_rd_10000.csv')
# 파일 iter 가능 하도록 리스트에 담기
xlsxs = [xlsx500, xlsx10000]


In [2]:
class adj_keras(tf.keras.models.Model):

    def fit(self, X_train, y_train, sample_weight = None):
        c_y = to_categorical(y_train)
        super().fit(X_train,c_y,sample_weight = sample_weight,epochs = 300, verbose = 0)
        self.classes_ = np.unique(y_train)
        
    def predict_proba(self, X_train):
        return super().predict(X_train)
        
    def predict(self, X):
        proba = self.predict_proba(X)
        pred = self.classes_.take(np.argmax(proba, axis = 1), axis = 0)
        return pred

In [3]:
class customMLPClassifer(MLPClassifier):
    def resample_with_replacement(self, X_train, y_train, sample_weight):

        # normalize sample_weights if not already
        sample_weight = sample_weight / sample_weight.sum(dtype=np.float64)

        X_train_resampled = np.zeros((len(X_train), len(X_train[0])), dtype=np.float32)
        y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
        for i in range(len(X_train)):
            # draw a number from 0 to len(X_train)-1
            draw = np.random.choice(np.arange(len(X_train)), p=sample_weight)

            # place the X and y at the drawn number into the resampled X and y
            X_train_resampled[i] = X_train[draw]
            y_train_resampled[i] = y_train[draw]

        return X_train_resampled, y_train_resampled


    def fit(self, X, y, sample_weight=None):
        if sample_weight is not None:
            X, y = self.resample_with_replacement(X, y, sample_weight)

        return self._fit(X, y, incremental=(self.warm_start and
                                            hasattr(self, "classes_")))
mlp = customMLPClassifer(hidden_layer_sizes=5, activation='logistic',solver = 'sgd', learning_rate_init= 0.1,max_iter = 500,n_iter_no_change = 500)


In [4]:
sgd=optimizers.SGD(lr=0.1,clipnorm=1.)
X = tf.keras.layers.Input(shape=[7])
H = tf.keras.layers.Dense(5,activation='sigmoid')(X) 
Y = tf.keras.layers.Dense(1, activation='sigmoid')(H)
estimator = adj_keras(X,Y)
estimator.compile(loss='mean_squared_error', metrics=['accuracy'], optimizer=sgd)

In [5]:
rusboost = RUSBoostClassifier(base_estimator=mlp, n_estimators=20, algorithm='SAMME')
clf = AdaBoostClassifier(base_estimator = mlp, n_estimators=20 , algorithm = 'SAMME')

# RUS

In [8]:
X, y = xlsx500.iloc[:,:7], xlsx500.iloc[:,7]
kfold_verify_RUS(rusboost,10,X, y, "RUS", 20)

In [9]:
X, y = xlsx10000.iloc[:,:7], xlsx10000.iloc[:,7]
kfold_verify_RUS(rusboost,10, X, y, "RUS",20)

# CUS

In [6]:
X, y = xlsx500.iloc[:,:7], xlsx500.iloc[:,7]
kfold_verify_CUS(clf,10,X, y, "CUS", 20)
X, y = xlsx10000.iloc[:,:7], xlsx10000.iloc[:,7]
kfold_verify_CUS(rusboost,10, X, y, "CUS",20)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
['fold', 'train/test', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'ratio7', 'target', 'predict', 'total_proba', 'estm 0 proba 0', 'estm 0 proba 1', 'estm 1 proba 0', 'estm 1 proba 1', 'estm 2 proba 0', 'estm 2 proba 1', 'estm 3 proba 0', 'estm 3 proba 1', 'estm 4 proba 0', 'estm 4 proba 1', 'estm 5 proba 0', 'estm 5 proba 1', 'estm 6 proba 0', 'estm 6 proba 1', 'estm 7 proba 0', 'estm 7 proba 1', 'estm 8 proba 0', 'estm 8 proba 1', 'estm 9 proba 0', 'estm 9 proba 1', 'estm 10 proba 0', 'estm 10 proba 1', 'estm 11 proba 0', 'estm 11 proba 1', 'estm 12 proba 0', 'estm 12 proba 1', 'estm 13 proba 0', 'estm 13 proba 1', 'estm 14 proba 0', 'estm 14 proba 1', 'estm 15 proba 0', 'estm 15 proba 1', 'estm 16 proba 0', 'estm 16 proba 1', 'estm 17 proba 0', 'estm 17 proba 1', 'estm 18 proba 0', 'estm 18 proba 1']
      0      1         2         3         4         5         6         7   \
0    1.0  train  1.841582 -0.112986  0.0299

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
['fold', 'train/test', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'ratio7', 'target', 'predict', 'total_proba', 'estm 0 proba 0', 'estm 0 proba 1', 'estm 1 proba 0', 'estm 1 proba 1', 'estm 2 proba 0', 'estm 2 proba 1', 'estm 3 proba 0', 'estm 3 proba 1', 'estm 4 proba 0', 'estm 4 proba 1', 'estm 5 proba 0', 'estm 5 proba 1', 'estm 6 proba 0', 'estm 6 proba 1', 'estm 7 proba 0', 'estm 7 proba 1', 'estm 8 proba 0', 'estm 8 proba 1', 'estm 9 proba 0', 'estm 9 proba 1', 'estm 10 proba 0', 'estm 10 proba 1', 'estm 11 proba 0', 'estm 11 proba 1', 'estm 12 proba 0', 'estm 12 proba 1', 'estm 13 proba 0', 'estm 13 proba 1', 'estm 14 proba 0', 'estm 14 proba 1', 'estm 15 proba 0', 'estm 15 proba 1', 'estm 16 proba 0', 'estm 16 proba 1', 'estm 17 proba 0', 'estm 17 proba 1', 'estm 18 proba 0', 'estm 18 proba 1', 'estm 19 proba 0', 'estm 19 proba 1']
      0      1         2         3         4         5         6         7   \
0 

0
1
2
3
4
5
6
7
8
9
10
11
12
['fold', 'train/test', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'ratio7', 'target', 'predict', 'total_proba', 'estm 0 proba 0', 'estm 0 proba 1', 'estm 1 proba 0', 'estm 1 proba 1', 'estm 2 proba 0', 'estm 2 proba 1', 'estm 3 proba 0', 'estm 3 proba 1', 'estm 4 proba 0', 'estm 4 proba 1', 'estm 5 proba 0', 'estm 5 proba 1', 'estm 6 proba 0', 'estm 6 proba 1', 'estm 7 proba 0', 'estm 7 proba 1', 'estm 8 proba 0', 'estm 8 proba 1', 'estm 9 proba 0', 'estm 9 proba 1', 'estm 10 proba 0', 'estm 10 proba 1', 'estm 11 proba 0', 'estm 11 proba 1', 'estm 12 proba 0', 'estm 12 proba 1']
      0      1         2         3         4         5         6         7   \
0    7.0  train -0.638571 -0.097144 -0.449753 -0.087178 -0.276824  0.690491   
1    7.0  train  0.443752 -0.115144 -0.196925 -0.814225 -0.308162  0.147209   
2    7.0  train -0.458183 -0.076098 -0.576167  1.003391  3.515141  1.306210   
3    7.0  train -0.518312  0.826304 -1.376789  1.609

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
['fold', 'train/test', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'ratio7', 'target', 'predict', 'total_proba', 'estm 0 proba 0', 'estm 0 proba 1', 'estm 1 proba 0', 'estm 1 proba 1', 'estm 2 proba 0', 'estm 2 proba 1', 'estm 3 proba 0', 'estm 3 proba 1', 'estm 4 proba 0', 'estm 4 proba 1', 'estm 5 proba 0', 'estm 5 proba 1', 'estm 6 proba 0', 'estm 6 proba 1', 'estm 7 proba 0', 'estm 7 proba 1', 'estm 8 proba 0', 'estm 8 proba 1', 'estm 9 proba 0', 'estm 9 proba 1', 'estm 10 proba 0', 'estm 10 proba 1', 'estm 11 proba 0', 'estm 11 proba 1', 'estm 12 proba 0', 'estm 12 proba 1', 'estm 13 proba 0', 'estm 13 proba 1', 'estm 14 proba 0', 'estm 14 proba 1', 'estm 15 proba 0', 'estm 15 proba 1', 'estm 16 proba 0', 'estm 16 proba 1', 'estm 17 proba 0', 'estm 17 proba 1', 'estm 18 proba 0', 'estm 18 proba 1', 'estm 19 proba 0', 'estm 19 proba 1']
       0      1         2         3         4         5         6         7   \
0

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
['fold', 'train/test', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'ratio7', 'target', 'predict', 'total_proba', 'estm 0 proba 0', 'estm 0 proba 1', 'estm 1 proba 0', 'estm 1 proba 1', 'estm 2 proba 0', 'estm 2 proba 1', 'estm 3 proba 0', 'estm 3 proba 1', 'estm 4 proba 0', 'estm 4 proba 1', 'estm 5 proba 0', 'estm 5 proba 1', 'estm 6 proba 0', 'estm 6 proba 1', 'estm 7 proba 0', 'estm 7 proba 1', 'estm 8 proba 0', 'estm 8 proba 1', 'estm 9 proba 0', 'estm 9 proba 1', 'estm 10 proba 0', 'estm 10 proba 1', 'estm 11 proba 0', 'estm 11 proba 1', 'estm 12 proba 0', 'estm 12 proba 1', 'estm 13 proba 0', 'estm 13 proba 1', 'estm 14 proba 0', 'estm 14 proba 1', 'estm 15 proba 0', 'estm 15 proba 1', 'estm 16 proba 0', 'estm 16 proba 1']
       0      1         2         3         4         5         6         7   \
0    13.0  train -0.627807 -0.101674 -0.429186 -0.091751 -0.276188  0.681812   
1    13.0  train  0.438839 -0.119509 -0.1805

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
['fold', 'train/test', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'ratio7', 'target', 'predict', 'total_proba', 'estm 0 proba 0', 'estm 0 proba 1', 'estm 1 proba 0', 'estm 1 proba 1', 'estm 2 proba 0', 'estm 2 proba 1', 'estm 3 proba 0', 'estm 3 proba 1', 'estm 4 proba 0', 'estm 4 proba 1', 'estm 5 proba 0', 'estm 5 proba 1', 'estm 6 proba 0', 'estm 6 proba 1', 'estm 7 proba 0', 'estm 7 proba 1', 'estm 8 proba 0', 'estm 8 proba 1', 'estm 9 proba 0', 'estm 9 proba 1', 'estm 10 proba 0', 'estm 10 proba 1', 'estm 11 proba 0', 'estm 11 proba 1', 'estm 12 proba 0', 'estm 12 proba 1', 'estm 13 proba 0', 'estm 13 proba 1', 'estm 14 proba 0', 'estm 14 proba 1', 'estm 15 proba 0', 'estm 15 proba 1', 'estm 16 proba 0', 'estm 16 proba 1', 'estm 17 proba 0', 'estm 17 proba 1', 'estm 18 proba 0', 'estm 18 proba 1', 'estm 19 proba 0', 'estm 19 proba 1']
       0      1         2         3         4         5         6         7   \
0

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
['fold', 'train/test', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'ratio7', 'target', 'predict', 'total_proba', 'estm 0 proba 0', 'estm 0 proba 1', 'estm 1 proba 0', 'estm 1 proba 1', 'estm 2 proba 0', 'estm 2 proba 1', 'estm 3 proba 0', 'estm 3 proba 1', 'estm 4 proba 0', 'estm 4 proba 1', 'estm 5 proba 0', 'estm 5 proba 1', 'estm 6 proba 0', 'estm 6 proba 1', 'estm 7 proba 0', 'estm 7 proba 1', 'estm 8 proba 0', 'estm 8 proba 1', 'estm 9 proba 0', 'estm 9 proba 1', 'estm 10 proba 0', 'estm 10 proba 1', 'estm 11 proba 0', 'estm 11 proba 1', 'estm 12 proba 0', 'estm 12 proba 1', 'estm 13 proba 0', 'estm 13 proba 1', 'estm 14 proba 0', 'estm 14 proba 1', 'estm 15 proba 0', 'estm 15 proba 1', 'estm 16 proba 0', 'estm 16 proba 1', 'estm 17 proba 0', 'estm 17 proba 1', 'estm 18 proba 0', 'estm 18 proba 1', 'estm 19 proba 0', 'estm 19 proba 1']
       0      1         2         3         4         5         6         7   \
0

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
['fold', 'train/test', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'ratio7', 'target', 'predict', 'total_proba', 'estm 0 proba 0', 'estm 0 proba 1', 'estm 1 proba 0', 'estm 1 proba 1', 'estm 2 proba 0', 'estm 2 proba 1', 'estm 3 proba 0', 'estm 3 proba 1', 'estm 4 proba 0', 'estm 4 proba 1', 'estm 5 proba 0', 'estm 5 proba 1', 'estm 6 proba 0', 'estm 6 proba 1', 'estm 7 proba 0', 'estm 7 proba 1', 'estm 8 proba 0', 'estm 8 proba 1', 'estm 9 proba 0', 'estm 9 proba 1', 'estm 10 proba 0', 'estm 10 proba 1', 'estm 11 proba 0', 'estm 11 proba 1', 'estm 12 proba 0', 'estm 12 proba 1', 'estm 13 proba 0', 'estm 13 proba 1', 'estm 14 proba 0', 'estm 14 proba 1', 'estm 15 proba 0', 'estm 15 proba 1', 'estm 16 proba 0', 'estm 16 proba 1', 'estm 17 proba 0', 'estm 17 proba 1', 'estm 18 proba 0', 'estm 18 proba 1', 'estm 19 proba 0', 'estm 19 proba 1']
       0      1         2         3         4         5         6         7   \
0

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
['fold', 'train/test', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'ratio7', 'target', 'predict', 'total_proba', 'estm 0 proba 0', 'estm 0 proba 1', 'estm 1 proba 0', 'estm 1 proba 1', 'estm 2 proba 0', 'estm 2 proba 1', 'estm 3 proba 0', 'estm 3 proba 1', 'estm 4 proba 0', 'estm 4 proba 1', 'estm 5 proba 0', 'estm 5 proba 1', 'estm 6 proba 0', 'estm 6 proba 1', 'estm 7 proba 0', 'estm 7 proba 1', 'estm 8 proba 0', 'estm 8 proba 1', 'estm 9 proba 0', 'estm 9 proba 1', 'estm 10 proba 0', 'estm 10 proba 1', 'estm 11 proba 0', 'estm 11 proba 1', 'estm 12 proba 0', 'estm 12 proba 1', 'estm 13 proba 0', 'estm 13 proba 1', 'estm 14 proba 0', 'estm 14 proba 1', 'estm 15 proba 0', 'estm 15 proba 1', 'estm 16 proba 0', 'estm 16 proba 1', 'estm 17 proba 0', 'estm 17 proba 1', 'estm 18 proba 0', 'estm 18 proba 1', 'estm 19 proba 0', 'estm 19 proba 1']
       0      1         2         3         4         5         6         7   \
0

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
['fold', 'train/test', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'ratio7', 'target', 'predict', 'total_proba', 'estm 0 proba 0', 'estm 0 proba 1', 'estm 1 proba 0', 'estm 1 proba 1', 'estm 2 proba 0', 'estm 2 proba 1', 'estm 3 proba 0', 'estm 3 proba 1', 'estm 4 proba 0', 'estm 4 proba 1', 'estm 5 proba 0', 'estm 5 proba 1', 'estm 6 proba 0', 'estm 6 proba 1', 'estm 7 proba 0', 'estm 7 proba 1', 'estm 8 proba 0', 'estm 8 proba 1', 'estm 9 proba 0', 'estm 9 proba 1', 'estm 10 proba 0', 'estm 10 proba 1', 'estm 11 proba 0', 'estm 11 proba 1', 'estm 12 proba 0', 'estm 12 proba 1', 'estm 13 proba 0', 'estm 13 proba 1', 'estm 14 proba 0', 'estm 14 proba 1', 'estm 15 proba 0', 'estm 15 proba 1', 'estm 16 proba 0', 'estm 16 proba 1', 'estm 17 proba 0', 'estm 17 proba 1', 'estm 18 proba 0', 'estm 18 proba 1', 'estm 19 proba 0', 'estm 19 proba 1']
       0      1         2         3         4         5         6         7   \
0

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
['fold', 'train/test', 'ratio1', 'ratio2', 'ratio3', 'ratio4', 'ratio5', 'ratio6', 'ratio7', 'target', 'predict', 'total_proba', 'estm 0 proba 0', 'estm 0 proba 1', 'estm 1 proba 0', 'estm 1 proba 1', 'estm 2 proba 0', 'estm 2 proba 1', 'estm 3 proba 0', 'estm 3 proba 1', 'estm 4 proba 0', 'estm 4 proba 1', 'estm 5 proba 0', 'estm 5 proba 1', 'estm 6 proba 0', 'estm 6 proba 1', 'estm 7 proba 0', 'estm 7 proba 1', 'estm 8 proba 0', 'estm 8 proba 1', 'estm 9 proba 0', 'estm 9 proba 1', 'estm 10 proba 0', 'estm 10 proba 1', 'estm 11 proba 0', 'estm 11 proba 1', 'estm 12 proba 0', 'estm 12 proba 1', 'estm 13 proba 0', 'estm 13 proba 1', 'estm 14 proba 0', 'estm 14 proba 1', 'estm 15 proba 0', 'estm 15 proba 1', 'estm 16 proba 0', 'estm 16 proba 1', 'estm 17 proba 0', 'estm 17 proba 1', 'estm 18 proba 0', 'estm 18 proba 1', 'estm 19 proba 0', 'estm 19 proba 1']
       0      1         2          3         4         5         6         7   \


KeyboardInterrupt: 

# ROS

In [7]:
clf = AdaBoostClassifier(base_estimator = mlp, n_estimators = 20, algorithm='SAMME',learning_rate = 0.1)
for i in xlsxs:
    X = i.iloc[:,:7]
    y = i.iloc[:,7]
    kfold_verify_ROS(clf, 10, X, y, "ROS",20)

KeyboardInterrupt: 

# SVMadaboost

In [3]:
s = svm.SVC(probability = True)
clf = AdaBoostClassifier(base_estimator=s,
                         n_estimators = 20,
                        learning_rate=0.1,
                        algorithm = "SAMME") # adaboost 선언

In [4]:
for i in xlsxs:
    X = i.iloc[:,:7]
    y = i.iloc[:,7]    
    fold1(clf,i, X, y, "SVMada")

KeyboardInterrupt: 