In [1]:
import os 
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow import feature_column
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
print(tf.__version__)

1.14.0


In [2]:
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('target')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

def unique(list1): 
  
    # intilize a null list 
    unique_list = [] 
      
    # traverse for all elements 
    for x in list1: 
        # check if exists in unique_list or not 
        if x not in unique_list: 
            unique_list.append(x) 
    # print list 
    for x in unique_list: 
        print(x)

In [3]:
plastML = pd.read_csv("/home/ahmad/scRNA-seqAnalysis/plastMLscopenames.csv")
plastML = plastML.drop("Unnamed: 0", axis=1)
plastML.rename(columns={'ct': 'target'}, inplace = True)
plastML.head()

Unnamed: 0,target,C,D,E,F,G,H,I,J,K,...,ASD,ASE,ASF,ASG,ASH,ASI,ASJ,ASK,ASL,ASM
0,Stele,-0.258602,-0.330743,-0.36992,-0.3183,4.276812,1.027151,0.517398,-0.736007,-0.632255,...,0.645462,-0.252019,-0.146278,-0.07506,-0.034928,-0.183443,-0.083272,-0.046208,-0.104284,3.003427
1,Stele,-0.191696,0.511507,-0.283618,-0.236304,-0.247256,-0.622487,-0.047162,-0.59298,1.226199,...,-0.467602,-0.191643,-0.110144,-0.054311,-0.024493,-0.136469,-0.062023,-0.033389,-0.077612,1.994303
2,Non Hair Cells,-0.224581,3.531181,-0.32619,-0.27671,2.466134,-0.700523,0.352002,-0.665129,-0.565071,...,-0.532635,-0.221292,-0.127831,-0.064473,-0.0296,-0.159485,-0.07243,-0.039657,-0.090676,-0.567264
3,Stele,-0.183703,-1.167774,-0.273234,-0.226459,-0.237216,0.830474,-0.869091,-0.574942,-0.480348,...,1.478974,-0.184443,-0.105859,-0.051844,-0.023252,6.277999,-0.059496,-0.031867,-0.074442,3.029316
4,Endodermis,-0.156269,-0.316606,-0.237501,-0.192605,-0.202682,-0.533415,-0.783387,1.46565,-0.42133,...,-0.395155,-0.159747,-0.091178,-0.043366,-0.01897,-0.111792,-0.050813,-0.026631,-0.063555,0.046629


In [4]:
plastML.shape
unique(plastML.target)

Stele
Non Hair Cells
Endodermis
Phloem
Hair Cells
Meristem
Root Cap Cells
Xylem
Cortex


In [5]:
plastML.dtypes
plastML['target'] = pd.Categorical(plastML['target'])
plastML['target'] = plastML.target.cat.codes
plastML.head()

# for target: 
# 1 is endodermis
# 2 is hair cells
# 3 is meristem
# 4 is non hair cells
# 5 is phloem
# 6 is root cap cells
# 7 is stele
# 8 is xylem
# 9 is cortex

Unnamed: 0,target,C,D,E,F,G,H,I,J,K,...,ASD,ASE,ASF,ASG,ASH,ASI,ASJ,ASK,ASL,ASM
0,7,-0.258602,-0.330743,-0.36992,-0.3183,4.276812,1.027151,0.517398,-0.736007,-0.632255,...,0.645462,-0.252019,-0.146278,-0.07506,-0.034928,-0.183443,-0.083272,-0.046208,-0.104284,3.003427
1,7,-0.191696,0.511507,-0.283618,-0.236304,-0.247256,-0.622487,-0.047162,-0.59298,1.226199,...,-0.467602,-0.191643,-0.110144,-0.054311,-0.024493,-0.136469,-0.062023,-0.033389,-0.077612,1.994303
2,4,-0.224581,3.531181,-0.32619,-0.27671,2.466134,-0.700523,0.352002,-0.665129,-0.565071,...,-0.532635,-0.221292,-0.127831,-0.064473,-0.0296,-0.159485,-0.07243,-0.039657,-0.090676,-0.567264
3,7,-0.183703,-1.167774,-0.273234,-0.226459,-0.237216,0.830474,-0.869091,-0.574942,-0.480348,...,1.478974,-0.184443,-0.105859,-0.051844,-0.023252,6.277999,-0.059496,-0.031867,-0.074442,3.029316
4,1,-0.156269,-0.316606,-0.237501,-0.192605,-0.202682,-0.533415,-0.783387,1.46565,-0.42133,...,-0.395155,-0.159747,-0.091178,-0.043366,-0.01897,-0.111792,-0.050813,-0.026631,-0.063555,0.046629


In [6]:
train, test = train_test_split(plastML, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

4805 train examples
1202 validation examples
1502 test examples


In [7]:
train.head()
train.shape

(4805, 1182)

In [8]:
col = train.columns[1:1182,]
col

Index(['C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
       ...
       'ASD', 'ASE', 'ASF', 'ASG', 'ASH', 'ASI', 'ASJ', 'ASK', 'ASL', 'ASM'],
      dtype='object', length=1181)

In [9]:
feature_columns = []
for header in col:
  feature_columns.append(feature_column.numeric_column(header))
len(feature_columns)

1181

In [10]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [11]:
batch_size = 500
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [12]:
model = tf.keras.Sequential([
    feature_layer,
    layers.Dense(300, activation= 'relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(50, activation='relu'),
    layers.Dense(5, activation='softmax')
])

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [13]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [14]:
model.fit(train_ds,
          epochs=5)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7efe8e8e0e80>

In [16]:
loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy)

Accuracy 0.07523302
