In [1]:
import pandas as pd
import numpy as np

# Extract prediction value

In [2]:
df_sigmoid = pd.read_csv('Sigmoid_data.csv', usecols=['dropout', 'learning.rate', 'L1.regularization', 'training.steps', 'accuracy', 'execution.time'])
df_sigmoid.head()

Unnamed: 0,dropout,learning.rate,L1.regularization,training.steps,accuracy,execution.time
0,0.01,0.0002,0.01,100,0.466667,8.079377
1,0.01,0.0002,0.01,500,0.466667,7.283534
2,0.01,0.0002,0.01,2500,0.266667,5.555197
3,0.01,0.0002,0.1,100,0.266667,7.278638
4,0.01,0.0002,0.1,500,0.466667,16.311707


In [3]:
#Predicted Quality
def pred_quality(x):
    if x < 0.5:
        return 0
    elif (x >= 0.5 and x < 0.8):
        return 1
    else:
        return 2

df_sigmoid['Prediction'] = df_sigmoid['accuracy'].apply(pred_quality)
df_sigmoid.to_csv('Sigmoid_data_prediction.csv')

In [4]:
df_sigmoid.head()

Unnamed: 0,dropout,learning.rate,L1.regularization,training.steps,accuracy,execution.time,Prediction
0,0.01,0.0002,0.01,100,0.466667,8.079377,0
1,0.01,0.0002,0.01,500,0.466667,7.283534,0
2,0.01,0.0002,0.01,2500,0.266667,5.555197,0
3,0.01,0.0002,0.1,100,0.266667,7.278638,0
4,0.01,0.0002,0.1,500,0.466667,16.311707,0


In [5]:
lst = list(df_sigmoid.columns)
lst

['dropout',
 'learning.rate',
 'L1.regularization',
 'training.steps',
 'accuracy',
 'execution.time',
 'Prediction']

In [8]:
X = df_sigmoid[lst[0:4]]
y = df_sigmoid['Prediction']
X.head()

Unnamed: 0,dropout,learning.rate,L1.regularization,training.steps
0,0.01,0.0002,0.01,100
1,0.01,0.0002,0.01,500
2,0.01,0.0002,0.01,2500
3,0.01,0.0002,0.1,100
4,0.01,0.0002,0.1,500


# Linear Regression Model

In [9]:
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression(max_iter=10000, C=100, tol=0.0000001, solver='sag', multi_class='multinomial')

# Train Test split

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=101)

In [12]:
X_train

Unnamed: 0,dropout,learning.rate,L1.regularization,training.steps
284,0.25,0.2048,1.00,2500
95,0.05,0.0008,10.00,2500
213,0.10,0.2048,10.00,100
35,0.01,0.0032,10.00,2500
231,0.25,0.0008,0.10,100
38,0.01,0.0128,0.01,2500
265,0.25,0.0512,0.01,500
199,0.10,0.0512,1.00,500
281,0.25,0.2048,0.10,2500
64,0.01,0.2048,0.10,500


In [13]:
y_train

284    0
95     0
213    0
35     0
231    0
38     0
265    0
199    2
281    1
64     2
133    0
170    0
72     0
247    1
218    0
198    1
154    0
25     0
180    0
112    2
252    0
22     0
42     1
216    0
200    0
93     0
121    0
145    0
88     0
219    0
      ..
103    1
7      0
99     0
164    0
62     2
239    0
105    0
107    0
223    0
76     0
126    1
249    0
44     0
59     0
136    2
111    1
49     0
283    0
5      0
110    1
192    0
220    0
245    0
286    0
132    0
40     2
75     0
87     0
63     2
11     0
Name: Prediction, dtype: int64

# Scale the value

In [14]:
from sklearn import preprocessing
scaler = preprocessing.MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [15]:
X_train_scaled

array([[1.        , 1.        , 0.0990991 , 1.        ],
       [0.16666667, 0.00293255, 1.        , 1.        ],
       [0.375     , 1.        , 1.        , 0.        ],
       [0.        , 0.01466276, 1.        , 1.        ],
       [1.        , 0.00293255, 0.00900901, 0.        ],
       [0.        , 0.06158358, 0.        , 1.        ],
       [1.        , 0.24926686, 0.        , 0.16666667],
       [0.375     , 0.24926686, 0.0990991 , 0.16666667],
       [1.        , 1.        , 0.00900901, 1.        ],
       [0.        , 1.        , 0.00900901, 0.16666667],
       [0.16666667, 1.        , 0.        , 0.16666667],
       [0.375     , 0.01466276, 0.        , 1.        ],
       [0.16666667, 0.        , 0.        , 0.        ],
       [1.        , 0.01466276, 0.0990991 , 0.16666667],
       [1.        , 0.        , 0.        , 1.        ],
       [0.375     , 0.24926686, 0.0990991 , 0.        ],
       [0.375     , 0.        , 1.        , 0.16666667],
       [0.        , 0.01466276,

In [16]:
log_model.fit(X_train_scaled, y_train)

LogisticRegression(C=100, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=10000, multi_class='multinomial',
          n_jobs=1, penalty='l2', random_state=None, solver='sag',
          tol=1e-07, verbose=0, warm_start=False)

In [17]:
y_pred = log_model.predict(X_test_scaled)
y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [19]:
from sklearn.metrics import confusion_matrix
conf = pd.DataFrame(data=confusion_matrix(y_test, y_pred), 
                    columns=['Predicted Low', 'Predicted Medium', 'Predicted High'], 
                    index=['True Low','True Medium','True High'])
conf

Unnamed: 0,Predicted Low,Predicted Medium,Predicted High
True Low,70,0,1
True Medium,16,0,0
True High,8,0,1


In [20]:
from sklearn.metrics import classification_report
target_names = ['Low', 'Medium', 'High']
print(classification_report(y_test, y_pred, target_names=target_names))

             precision    recall  f1-score   support

        Low       0.74      0.99      0.85        71
     Medium       0.00      0.00      0.00        16
       High       0.50      0.11      0.18         9

avg / total       0.60      0.74      0.64        96



  'precision', 'predicted', average, warn_for)


# DNN Classifier

In [21]:
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '2'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from tensorflow import logging
logging.set_verbosity(logging.ERROR)

  from ._conv import register_converters as _register_converters


## Split dataset into train and test csv

In [23]:
def split_dataset(data, test_size):
    data_file = data
    df = pd.read_csv(data, usecols=['dropout', 'learning.rate', 'L1.regularization', 'training.steps','Prediction'])
    df_train, df_test = train_test_split(df, test_size=0.33, random_state=101)
    
    name = str(data_file).split('.')[0]
    name_train = name + '_train.csv'
    name_test = name + '_test.csv'
    df_train.to_csv(name_train, index=False, header=False)
    df_test.to_csv(name_test, index=False, header=False)
    return (df_train, df_test)

sigmoid_train, sigmoid_test = split_dataset('Sigmoid_data_prediction.csv', test_size=0.33)

In [24]:
training_set = tf.contrib.learn.datasets.base.load_csv_without_header(filename='Sigmoid_data_prediction_train.csv', 
                                                                      target_dtype=np.int, 
                                                                      features_dtype=np.float64, 
                                                                      target_column=-1)
test_set = tf.contrib.learn.datasets.base.load_csv_without_header(filename='Sigmoid_data_prediction_test.csv', 
                                                                      target_dtype=np.int, 
                                                                      features_dtype=np.float64, 
                                                                      target_column=-1)

In [26]:
training_set

Dataset(data=array([[2.500e-01, 2.048e-01, 1.000e+00, 2.500e+03],
       [5.000e-02, 8.000e-04, 1.000e+01, 2.500e+03],
       [1.000e-01, 2.048e-01, 1.000e+01, 1.000e+02],
       [1.000e-02, 3.200e-03, 1.000e+01, 2.500e+03],
       [2.500e-01, 8.000e-04, 1.000e-01, 1.000e+02],
       [1.000e-02, 1.280e-02, 1.000e-02, 2.500e+03],
       [2.500e-01, 5.120e-02, 1.000e-02, 5.000e+02],
       [1.000e-01, 5.120e-02, 1.000e+00, 5.000e+02],
       [2.500e-01, 2.048e-01, 1.000e-01, 2.500e+03],
       [1.000e-02, 2.048e-01, 1.000e-01, 5.000e+02],
       [5.000e-02, 2.048e-01, 1.000e-02, 5.000e+02],
       [1.000e-01, 3.200e-03, 1.000e-02, 2.500e+03],
       [5.000e-02, 2.000e-04, 1.000e-02, 1.000e+02],
       [2.500e-01, 3.200e-03, 1.000e+00, 5.000e+02],
       [2.500e-01, 2.000e-04, 1.000e-02, 2.500e+03],
       [1.000e-01, 5.120e-02, 1.000e+00, 1.000e+02],
       [1.000e-01, 2.000e-04, 1.000e+01, 5.000e+02],
       [1.000e-02, 3.200e-03, 1.000e-02, 5.000e+02],
       [1.000e-01, 1.280e-02, 1.0

In [27]:
#Specify that all features have real-value numeric data
feature_columns = [tf.feature_column.numeric_column('x', shape=[4])]

#Define the training and test input functions
train_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': np.array(training_set.data)}, 
                                                    y=np.array(training_set.target), 
                                                    num_epochs=1000, 
                                                    shuffle=True)
test_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': np.array(test_set.data)}, 
                                                    y=np.array(test_set.target), 
                                                    num_epochs=1, 
                                                    shuffle=False)

In [29]:
classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns, 
                                        hidden_units=[20,20,20], 
                                        dropout=0.01,    
                                        n_classes=3, 
                                        optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=0.001,
                                                                                    l1_regularization_strength=10.0),
                                        activation_fn=tf.nn.relu)

In [None]:
classifier.train(input_fn=train_input_fn, steps=1000)
classifier.evaluate(input_fn=test_input_fn)["accuracy"]