In [1]:
import pandas as pd 
import tensorflow as tf 
import numpy as np 

import requests
import json
import sys
import os
from types import *
from collections import Counter

In [2]:
#read data => class - COM or FP input // output - CAT 100 or 4! 
class fpDataModel:
    def __init__(self, path, norm, batch_size, dType, labelCol, dataCol = 4, nC=100, nRange=1 , toList = True):
        self.path           = path
        self.norm           = norm
        self.batch_size     = batch_size
        self.dType          = dType
        self.labelCol       = labelCol
        self.dataCol        = dataCol
        self.nC             = nC
        self.nRange         = nRange
        self.toList         = toList 

    def next_batch(self, num, data, labels):
        idx = np.arange(0 , len(data))
        np.random.shuffle(idx)
        idx = idx[:num]
        data_shuffle = [data[i] for i in idx]
        labels_shuffle = [labels[i] for i in idx]

        return np.asarray(data_shuffle), np.asarray(labels_shuffle)
    
    def classif(self, df):
        if( df < 40 ): return [0,0,0,1] 
        elif( df >= 40 and df < 60 ): return [0,0,1,0]
        elif( df >= 60 and df < 90 ): return [0,1,0,0] 
        elif( df >= 90 ): return [1,0,0,0] 
    def declassif(self, df): 
        if  ( df == [0,0,0,1] ):   return 1 
        elif( df == [0,0,1,0] ):   return 2
        elif( df == [0,1,0,0] ):   return 3  
        elif( df == [1,0,0,0] ):   return 4      
    def regress(self, df): #
        return [df]
    
    def classifN(self, df):
        listofzeros = [0] * self.nC
        dfIndex = df//self.nRange
        # print('{} and {}', (df,dfIndex))
        if dfIndex < self.nC:
            listofzeros[dfIndex] = 1 
        return listofzeros
    
    def deClassifN(self, df, val = 1 ):
        return df.index(val)
    
    def split_lab_dat(self, dst):
        cat  = dst.loc[:, self.labelCol]
        dat  = dst.iloc[:, self.dataCol:]
        if (self.toList): 
            cat = cat.as_matrix().tolist()
            dat = dat.as_matrix().tolist()
        return {'label' : cat, 'data' : dat}
    
    #Get Data
    def get_data(self, typeSep = True, pathA = "", filter = ""):
        if pathA != "":
            dst =  pd.read_csv( tf.gfile.Open(pathA), sep=None, skipinitialspace=True,  engine="python")
        else: 
            dst =  pd.read_csv( tf.gfile.Open(self.path), sep=None, skipinitialspace=True,  engine="python")
        
        dst = dst.fillna(0)
        
        if filter == '>23':
            dst = dst[dst["FP"]>23]
        elif filter == '>60':
            dst = dst[dst["FP"]>60]


        if self.norm != "":
            cat_n  = dst.loc[:,'FP'] 
            dst['FP'] = self.normalization( cat_n )

        if   self.dType == 'class':       # Classification in 4 categories
            dst.insert(2, 'FP_C', dst['FP'].map(lambda x: self.classif(x)))
        elif self.dType == 'reg':       # Regression
            dst.insert(2, 'FP_R', dst['FP'].map(lambda x: self.regress(x)))
        elif self.dType == 'classN':    # Classification in N categories  
            dst.insert(2, 'FP_C', dst['FP'].map(lambda x: self.classifN(x))) 
        
        self.dst = dst

        # 3 if no type and 4 if type
        if typeSep == True:
            dst_tmp = [rows for _, rows in dst.groupby('Type')]
            data_e  = self.split_lab_dat(dst_tmp[0])
            data_t  = self.split_lab_dat(dst_tmp[1])
            return data_t, data_e
        else :   return  self.split_lab_dat(dst_tmp[0])
    
    
    
    def get_data2(self, colu="", datu=""):
        pass
    
    def set_columns(self, url ):        # set the main data frame from the class: 
        columns_path = url
        self.col_df = pd.read_csv(columns_path, index_col=0, sep=',', usecols=[0,1,2,3])
        return(len(self.col_df))
        
    def feed_data(self, url , type="", d_st = False):
        json_df = pd.DataFrame(columns=self.col_df.index) 
        df_entry = pd.Series(index=self.col_df.index)

        df_entry = df_entry.fillna(0) 
        comp_out_count = Counter()
        
        if(isinstance(url, list)):json_data = url
        else:   
            json_str=open(url).read()
            json_data = json.loads(json_str)

        for i in range(len(json_data)):
            df_entry *= 0
            m = str(json_data[i]["m"])
            df_entry.name = m
            for key in json_data[i]:
                if key == "m":  
                    pass            
                else: 
                    #key_wz = str(int(key)
                    key_wz = key
                    try:
                        ds_col = self.col_df.loc[key_wz]
                        #df_entry.loc[key_wz]
                        df_entry[key_wz] =  np.float32(json_data[i][key])
                    except: 
                        if d_st == True: 
                            print("column: {} not included in the input of: {}" .format(key_wz, m))
                        # comp_out_count[key_wz] +=1
            json_df = json_df.append(df_entry,ignore_index=False)
        # print("Counter of comp. not included :")
        # print(len(comp_out_count))
        # return json_df  
        return json_df.as_matrix().tolist()  


    def check_perf(self, lA, lB):
        assert(len(lA) == len(lB))
        gt3  = 0
        gtM = 0
        num = 0
        for i in range(len(lA)):
            num = abs(lA[i]-lB[i])
            if num > 3: gt3+=1
            if num > 10: gtM+=1
        return gt3, gtM
    

In [6]:
#unit tests: 
ALL_DS     = "../../_zfp/data/FRFLO/datac.csv"
LAB_DS     = "../../_zfp/data/FRFLO/datal.csv"
COL_DS     = "../../_zfp/data/FRFLO/colcom.csv"

col_df = pd.read_csv(COL_DS, index_col=0, sep=',', usecols=[0,1,2,3])
# print(col_df)

# read files
n_classes   = 100    

xtp1        = []  
ytp1        = []

ALL_DS     = "../../_zfp/data/FRFLO/TFFRFLO_ALSN.csv"
LOGDIR      = "../../_zfp/data/my_graph/0F2CRB/"
model_path  = LOGDIR + "model.ckpt"
dataClass = fpDataModel( path= ALL_DS, norm = '', batch_size = 128, dType="classN", labelCol = 'FP_C', 
                         dataCol = 4,   nC=n_classes, nRange=1, toList = True )



dataTrain,  dataEv =  dataClass.get_data( ) 
print("data read - lenTrain={} - lenTests={}" .format(len(dataTrain["label"]),len(dataEv["label"]), ))


data read - lenTrain=4804 - lenTests=4806


In [8]:
print("NETWORK")

# Parameters
learning_rate = 0.001
n_hidden_1  = 256   
n_hidden_2  = 256   
n_input     = 1814 #1221

# cust - network 
x = tf.placeholder(tf.float32,   shape=[None, n_input],   name="x")
y = tf.placeholder(tf.int16,     shape=[None, n_classes], name="cat")

weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1]),    name="Weights_1"),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2]), name="Weights_2"),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]), name="Weights_out"),
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1]), name="Bias_1"),
    'b2': tf.Variable(tf.random_normal([n_hidden_2]), name="Bias_2"),
    'out': tf.Variable(tf.random_normal([n_classes]), name="Bias_out"),
}

# Hidden layer with RELU activation
with tf.name_scope("fc_1"):
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
with tf.name_scope("fc_2"):
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
with tf.name_scope("fc_output"):
    pred = tf.matmul(layer_2, weights['out']) + biases['out']

# - declaration of model and global attributes 
# pred = multilayer_perceptron(x, weights, biases)
# prediction=tf.reduce_max(y,1)
with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

with tf.name_scope("xent"):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
    tf.summary.scalar("xent", cost)
    
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

init = tf.global_variables_initializer()
saver = tf.train.Saver()
print("done")

NETWORK
done


In [13]:
# train
print("TRAINING...")
batch_size = 128
training_iters = 5000 #200000
display_step = training_iters*0.1 #10%
record_step  = training_iters*0.05 

with tf.Session() as sess:
    # Initialize variables
    sess.run(init)
#         writer = tf.summary.FileWriter(LOGDIR + hparam)
#         writer.add_graph(sess.graph)

    for i in range(training_iters): 

        xtb, ytb = dataClass.next_batch(batch_size, dataTrain['data'], dataTrain['label']) 

        if i % record_step == 0:
            [train_accuracy] = sess.run([accuracy], feed_dict={x: xtb, y: ytb }) 
#             writer.add_summary(s, i)

        if i % display_step == 0:
            print("step %d, training accracy %g " %(i, train_accuracy))

        sess.run(optimizer, feed_dict={x: xtb, y: ytb})

    print("Optimization Finished!")
    save_path = saver.save(sess, model_path)
    print("Model saved in file: %s" % save_path)
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: dataEv['data'], y: dataEv['label']}))


TRAINING...
step 0, training accracy 0.03125 
step 500, training accracy 0.132812 
step 1000, training accracy 0.1875 
step 1500, training accracy 0.234375 
step 2000, training accracy 0.382812 
step 2500, training accracy 0.507812 
step 3000, training accracy 0.484375 
step 3500, training accracy 0.640625 
step 4000, training accracy 0.6875 
step 4500, training accracy 0.671875 
Optimization Finished!
Model saved in file: ../../_zfp/data/my_graph/0F2CRB/model.ckpt
Testing Accuracy: 0.082397


In [4]:
def restore_model(sess):
    print(model_path)
    saver.restore(sess, model_path)
#     saver.restore(sess, "../../_zfp/data/my_graph/0F2CR2/model.ckpt")

In [12]:
print("EVALUATION...")
with tf.Session() as sess:
    print("Model restored from file: %s" % model_path)

    sess.run(init)
    restore_model(sess)


    # test the model
    print("Training   Accuracy:", sess.run(accuracy, feed_dict={x: dataTrain['data'], y: dataTrain['label']}))
    print("Evaluation Accuracy:", sess.run(accuracy, feed_dict={x: dataEv['data'],    y: dataEv['label']}))

    # xtp1.append(dataTest['data'][i]);    ytp1.append(dataTest['label'][i])
    predv = sess.run([pred], feed_dict={x: dataEv['data']}) 
    predvList = predv.tolist()

    for i in range(20):
        print("RealVal: {}  - PP value: {}".format( dataClass.deClassifN( dataEv['label'][i]), 
                                                   dataClass.deClassifN( predv.tolist()[i], np.max(predv[i]))  ))
        # maxa = sess.run([prediction], feed_dict={y: predv })
    pred_val = []
    data_val = []
    
    print("denormalization all Evaluation : {} = {}" .format(len(predv), len(dataEv["label"])))
    
    for i in range(len(predv)):
        if (i % 1000==0): print(i)
        pred_vali = 0; data_vali = 0;
        try:
            pred_vali = dataClass.deClassifN( predv.tolist()[i], np.max(predv[i]))
            data_vali = dataClass.deClassifN( dataEv['label'][i])
            # print("realVal: {} -- PP value: {}".format(data_vali,pred_vali))
            pred_val.append(pred_vali)
            data_val.append(data_vali)
        except:
            print("error: i={}, pred={}, data={} -- ".format(i, pred_vali, data_vali))
    l3, l15 = dataClass.check_perf(pred_val, data_val)  
    print("Total: {} GT3: {}  GTM: {}".format(len(pred_val), l3, l15))     


EVALUATION...
Model restored from file: ../../_zfp/data/my_graph/0F2CRB/model.ckpt
../../_zfp/data/my_graph/0F2CRB/model.ckpt
INFO:tensorflow:Restoring parameters from ../../_zfp/data/my_graph/0F2CRB/model.ckpt
Training   Accuracy: 0.00166528
Evaluation Accuracy: 0.00124844


AttributeError: 'list' object has no attribute 'tolist'