In [2]:
import os
import numpy as np
import tensorflow as tf
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale 

  from ._conv import register_converters as _register_converters


In [137]:
df = pd.read_csv('creditcard.csv')
filter_df = df.drop(['Time','Amount'],axis=1)
normal_df = filter_df[filter_df['Class'] == 0]
fraud_df = filter_df[filter_df['Class'] == 1]

dataset = pd.concat([normal_df.sample(3*len(fraud_df)),fraud_df])

X_mat = dataset.drop('Class',axis=1).as_matrix()
U,S,VT = np.linalg.svd(X_mat.T.dot(X_mat))

In [138]:
pairs = zip(dataset.drop('Class',axis=1).columns, np.abs(U[:,0]))
pairs = sorted(pairs,key=lambda x:x[1],reverse=True)
newx,newy = zip(*pairs)

In [158]:
selected_var = sorted(list(newx[:14]),key=lambda x:int(x[1:]))
#selected_var.append('Class')
down_sample = dataset[selected_var]

feature_x = scale(dataset[selected_var])
feature_y = dataset['Class']

design_matrix = np.column_stack([feature_x, feature_y.as_matrix()])
train_data,test_data = train_test_split(design_matrix,test_size=0.2, random_state=0)

In [249]:
tf.reset_default_graph()

def get_pipe(dataset,batch_size=10):
    data = tf.data.Dataset.from_tensor_slices( tf.cast(dataset,dtype=tf.float32) ) 
    data = data.shuffle(3000).repeat().batch(batch_size)
    batch = data.make_one_shot_iterator().get_next()
    return batch[:,:-1], tf.cast(batch[:,-1],tf.int32)

def get_model(input):
    
    with tf.variable_scope('Layer1'):
        w1 = tf.get_variable('W1',shape=[14,20], initializer=tf.random_normal_initializer)
        b1 = tf.get_variable('B1',shape=[20], initializer=tf.random_normal_initializer)
        l1 = tf.nn.relu( tf.matmul(input,w1) + b1 )
        
    with tf.variable_scope('Layer2'):
        w2 = tf.get_variable('W2',shape=[20,10], initializer=tf.random_normal_initializer)
        b2 = tf.get_variable('B2',shape=[10], initializer=tf.random_normal_initializer)
        l2 = tf.nn.relu( tf.matmul(l1,w2) + b2 )
        
    with tf.variable_scope('Layer3'):
        w3 = tf.get_variable('W3',shape=[10,2], initializer=tf.random_normal_initializer)
        b3 = tf.get_variable('B3',shape=[2], initializer=tf.random_normal_initializer)
        y = tf.matmul(l2,w3) + b3
        
    return y


def get_loss(logits,labels):

    # Cost function: Cross Entropy
    #cost = -tf.reduce_sum(label * tf.log(pred))
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits, name='cross_entropy')
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')
    
    ################################################
    ## Record the loss value (cross_entropy_mean) ##
    ################################################
    
    tf.summary.scalar('cross_entropy_mean', cross_entropy_mean)
    reg_loss = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if v.name[0] != 'B']) * 0.001
    
    return cross_entropy_mean + reg_loss


def train(loss,learning_rate=0.001):
    
    return tf.train.AdamOptimizer(learning_rate).minimize(loss)

    
def accuracy(logits, labels):
    predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
    score = tf.reduce_sum(tf.cast(tf.equal(predictions, labels), tf.int32))
    return score


In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd

  from ._conv import register_converters as _register_converters


### tensorflow multi layer regression

In [216]:
tf.reset_default_graph()

def get_model(batch):
    with tf.variable_scope('layer1'):
        w1 = tf.get_variable('W1',shape=[1,20],initializer=tf.random_normal_initializer)
        b1 = tf.get_variable('B1',shape=[20],initializer=tf.zeros_initializer)
        l1 = tf.nn.sigmoid( tf.matmul(batch,w1)+ b1 )
        
    with tf.variable_scope('layer2'):
        w2 = tf.get_variable('W2',shape=[20,20],initializer=tf.random_normal_initializer)
        b2 = tf.get_variable('B2',shape=[20],initializer=tf.zeros_initializer)
        l2 = tf.nn.sigmoid(tf.matmul(l1,w2)+ b2)
        
    with tf.variable_scope('layer3'):
        w3 = tf.get_variable('W3',shape=[20,1],initializer=tf.random_normal_initializer)
        b3 = tf.get_variable('B3',shape=[1],initializer=tf.zeros_initializer)
        y = tf.matmul(l2,w3)+ b3
        
    return y
        
def get_loss(pred,label):
    loss = tf.reduce_mean(tf.square(pred - label))
    return loss


def parse_csv(line):
    tokens = tf.string_split([line],delimiter=',')
    return tokens.values

# 같은 모델을 사용해도 성능이 좋지 않은데 이유를 모르겠음
# interleave 를 통해 변환하는 과정에서 문제가 있는 것으로 추정
def get_train(files):
    train = tf.data.Dataset.from_tensor_slices(files)
    train = train.interleave(lambda x:tf.data.TextLineDataset(x),cycle_length=2)
    #train = train.shuffle(200).repeat()
    train = train.repeat()
    train = train.map(parse_csv).map(lambda x:tf.string_to_number(x)).batch(102)
    return train

# 같은 모델을 사용해도 성능이 좋지 않은데 이유를 모르겠음
# interleave 를 통해 변환하는 과정에서 문제가 있는 것으로 추정
def get_valid(files):
    valid = tf.data.Dataset.from_tensor_slices(files)
    valid = valid.interleave(lambda x:tf.data.TextLineDataset(x),cycle_length=2)
    valid = valid.map(parse_csv).map(lambda x:tf.string_to_number(x)).batch(10)
    return valid


def get_text(file):
    text = tf.data.TextLineDataset(file)
    text = text.map(parse_csv).map(lambda x:tf.string_to_number(x)).repeat()
    text = text.batch(30).prefetch(1)
    #return text.make_one_shot_iterator().get_next()
    return text
    
    
def get_memory(file):
    data = np.matrix(pd.read_csv(file).as_matrix() ,dtype=np.float32)
    data = tf.data.Dataset.from_tensor_slices(data)
    data = data.batch(102).repeat()
    #return data.make_one_shot_iterator().get_next()
    return data
    

    

#xor : cos(x)
flist1 = ['/home/roadking/Downloads/xor.csv']
flist2 = ['/home/roadking/Downloads/test.csv']


#train_set = get_train(flist1)
#valid_set = get_valid(flist2)


train_set = get_memory(flist1[0])
valid_set = get_memory(flist2[0])

#train_set = get_text(flist1[0])
#valid_set = get_text(flist2[0])


iterator = tf.data.Iterator.from_structure(train_set.output_types,train_set.output_shapes)
batch = iterator.get_next()

train_iter = iterator.make_initializer(train_set)
valid_iter = iterator.make_initializer(valid_set)


pred = get_model(batch[:,:1])
loss = get_loss(pred,batch[:,1:])
train_op = tf.train.AdamOptimizer(0.05).minimize(loss)
init = tf.global_variables_initializer()

#with tf.train.MonitoredTrainingSession() as sess:
with tf.Session() as sess:
    sess.run([init,train_iter])
    for i in range(400):
        
        _,error = sess.run([train_op,loss])
        
        if(i+1)%100 == 0:
            print(i,error)   

    sess.run(valid_iter)
    print(sess.run(loss))


99 0.42700353
199 0.38728642
299 0.065513745
399 0.005765436
0.005436239


### sklearn neural network

In [208]:
tf.reset_default_graph()
df = pd.read_csv('/home/roadking/Downloads/xor.csv')
dt = pd.read_csv('/home/roadking/Downloads/test.csv')

In [209]:
data_set = df.as_matrix()

In [210]:
from sklearn.neural_network import MLPRegressor

In [211]:
clf = MLPRegressor(hidden_layer_sizes=(20,20,20),random_state=1,max_iter=800)
clf.fit(data_set[:,:1],data_set[:,1])
#clf.loss_curve_
error = np.mean(np.square(dt.iloc[:,1] - clf.predict(dt.iloc[:,:1])))
print(error)