Author: Andres Felipe Alba Hernandez
Applied AI Engineer <br>
Created: March 2020 <br>
Last Modified: April, 2020
email: v-analba@microsoft.com <br>
https://www.linkedin.com/in/ahandresf/


The goal of this jupyter notebook is to function as a data parser from csv file that contain a time series of data from the process that may be controlled later by the BRAIN. This data will be feed to different ML models that may learn the dynamic of the system and generalize in order to be used as simulator of the process later. This will allow us to train the BRAIN (RL algorithm) using a simulator (data-driven model). <br>

For this case:
- With $p$ as the number of variables in the action space.
- With $m$ as the number of variables in the state space. 
- Data input is a vector that concatanate the vetor $A$ and $S$  with  $A=a_{1_{n}},..a_{p_{n}}$ and  $S=s_{1_{n}}, ..., s_{m_{n}}$.
- Data output correspond to state $S_{n+1}$ with $S=s_{1_{n+1}}, ..., s_{m_{n+1}}$.

In [1]:
#Libraries
import numpy as np
import pandas as pd 
import csv
import pprint
import pickle
from datetime import datetime
import time
import json
import os
from copy import deepcopy

# Data Parsing 2020 and create metadata


## Reading data set

In [2]:
#provide the data directory
data_dir='C:/Users/aalbaher/dataset_pttgc'

In [3]:
#!dir
#initializing
headers_dic={}
short_list={}
'''
make sure your file is in the same folder of this jupyter notebook or provide 
the complete path to the file.
'''
file1='2020_Jan_March.csv'
file2='2019_Dec.csv'
file3='2019_Jan_Sep.csv'
file4='2018_2019.csv'
file5='2017_2018.csv'
file6='2016_2017.csv'
file_name1=('%s/%s')%(data_dir,file1)
file_name2=('%s/%s')%(data_dir,file2)
file_name3=('%s/%s')%(data_dir,file3)
file_name4=('%s/%s')%(data_dir,file4)
file_name5=('%s/%s')%(data_dir,file5)
file_name6=('%s/%s')%(data_dir,file6)

## Output directory

In [4]:
#provide the data directory
time_stamp=time.strftime('%B_%d_%H%M%S')
data_out_dir=r'C:/Users/aalbaher/dataset_pttgc/data_preprocess/'+time_stamp+'/'
os.makedirs(data_out_dir)
print(data_out_dir)

C:/Users/aalbaher/dataset_pttgc/data_preprocess/May_13_165417/


## Create metadata
Organize the metadata into a dictionary. This disctionary have some information that describe the each variable in the dataset. This variables will correspond later to most of the columns in a pandas data frame (except the first one that is time).

In [5]:
#open the file object
f=open(file_name1, newline='')
reader = csv.reader(f)
#getting metadata from the csv file, we skip first column. 
point_name = next(reader)[1::]
description = next(reader)[1::]
var_type = next(reader)[1::]
priority = next(reader)[1::]
units = next(reader)[1::]
#print(point_name)
#print(len(point_name))

In [6]:
#creating dictionary of headers
for i, var_name in enumerate(point_name):
    headers_dic[var_name]={
                     'description':description[i],
                     'var_type':var_type[i],
                     'priority':priority[i],
                     'units':units[i],
                     }
    if priority[i]=='10':
        short_list[var_name]=headers_dic[var_name]      
#pprint.pprint(headers_dic)
f.close() #close the file object

In [7]:
#Storing Metadata as dictionary
with open('headers_dic.json', 'w') as fp:
    json.dump(headers_dic, fp, indent=4)

### Building DataFrame
Now, we put the data itself into a dataframe, the first column is the time and the others are the possible state variables. 

In [8]:
#read the remaining data and put it in a pandas dataframe. 
print(file_name1)
name_of_columns=['time']+point_name
df1=pd.read_csv(file_name1,skiprows=5,names=name_of_columns)

C:/Users/aalbaher/dataset_pttgc/2020_Jan_March.csv


In [9]:
df1

Unnamed: 0,time,14FIC015.MEAS,16FIC501.MEAS,14TI521.PNT,14PRCA502.MEAS,14FRC509.MEAS,14FRCA506.MEAS,14TI504.PNT,14TI532.PNT,14PRCA506.MEAS,...,14FRC514.MEAS,14FRC501.MEAS,14QI508.PNT,14TY513.RO01,14FIC503.MEAS,14TI502.PNT,16Q001.PNT,14QRA502.PNT,14Y559.RO01,14LRCA503.MEAS
0,1/1/2020 0:00,6452.264160,746.490967,293.762207,9.486253,491.542084,15.149858,104.698769,219.788117,0.581865,...,1423.786255,4644.759766,104.805008,74.106934,5917.788086,151.574768,0.245352,83.655220,4.234562,38.160904
1,1/1/2020 0:01,6435.390625,752.322998,293.757568,9.493867,491.384552,15.114125,104.686874,219.774994,0.579752,...,1431.386719,4652.797363,104.806519,74.132072,5919.329102,151.555176,0.245539,83.660782,4.387371,38.277836
2,1/1/2020 0:02,6435.275391,746.536194,293.771973,9.509203,489.817596,15.125778,104.680672,219.822266,0.575745,...,1434.629028,4647.533691,104.831047,74.102310,5926.901855,151.593780,0.245629,83.655670,4.284944,38.163483
3,1/1/2020 0:03,6486.708496,744.761047,293.793488,9.513070,488.737946,15.156744,104.508133,219.763351,0.575633,...,1438.131836,4649.957520,104.940735,74.064453,5932.580078,151.361084,0.245434,83.656097,4.321112,38.047146
4,1/1/2020 0:04,6438.978027,749.576538,293.801300,9.529663,492.406342,15.141134,104.603279,219.720276,0.577805,...,1429.786865,4646.831055,105.039764,74.064941,5930.948730,151.584259,0.245171,83.654572,4.384132,37.973122
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98636,3/9/2020 11:56,6114.192383,675.065735,291.408081,9.645794,551.993408,11.042987,102.439766,218.786377,0.566735,...,1415.566162,4174.227051,104.912712,74.076981,5362.497559,149.020172,0.251332,83.771507,3.773265,37.616447
98637,3/9/2020 11:57,6111.750000,673.141907,291.323029,9.637948,548.302856,11.019686,102.205559,218.823013,0.565695,...,1427.341797,4148.922363,104.938042,74.089111,5354.990723,148.913574,0.251905,84.359352,3.808475,37.791866
98638,3/9/2020 11:58,6088.703613,678.479553,291.203247,9.653341,550.416992,10.999985,102.171616,218.841507,0.590596,...,1426.663818,4129.304199,104.648231,74.090919,5354.736816,148.873474,0.252462,84.366898,3.800796,37.853031
98639,3/9/2020 11:59,6085.645020,674.612854,291.110992,9.672322,553.572388,11.011888,102.007462,218.840713,0.609518,...,1441.762695,4116.078125,104.372345,74.097313,5365.450195,148.804611,0.253125,84.369316,3.778753,37.766796


Now we create a new column that is a timestamp created from the string of date time provided in the dataset. 

In [10]:
#remember it is month/day/year Hours:Minutes
df1['timestamp']=df1['time'].apply(lambda x: datetime.strptime(x,'%m/%d/%Y %H:%M' ).timestamp())

## Data Driven Model Input
Building the numpy array that is expected by the data driven model. <br>

The model expect to have X (input) and Y (output), where X is the actions vector concatenated with the input state vector, while Y is the output, state vector.

```x_set = np.empty(shape=(total_sample, int(action_space_dim+state_space_dim)))
y_set = np.empty(shape=(total_sample, int(state_space_dim)))
```

For our case the input would be one row, and the output will be the next row removing all the Manipulated variables (MV). These variables can be located as variable type == MV in the metadata dictionary. All the others variables are going to be consider state variables. 

In [11]:
action_names=[]
for key in headers_dic:
    if headers_dic[key]['var_type']=='MV':
        action_names.append(key)
print(action_names)

['14FICA508.MEAS', '14FICA508.SPT', '14TRC515.MEAS', '14TRC515.SPT', '14FRCA513.MEAS', '14FRCA513.SPT', '14FRCA511.MEAS', '14FRCA511.SPT']


In [12]:
#df.drop(action_names+['time'],axis=1).loc[0,:] #checking if it is the right data frame
'''
create a list with the columns that will NOT belong to the state variables
'''
#With time stamps
#drop_list=action_names+['time']

#without timestamps
drop_list=action_names+['time']+['timestamp']
print(drop_list)

['14FICA508.MEAS', '14FICA508.SPT', '14TRC515.MEAS', '14TRC515.SPT', '14FRCA513.MEAS', '14FRCA513.SPT', '14FRCA511.MEAS', '14FRCA511.SPT', 'time', 'timestamp']


In [13]:
#print(action_names)
state_names=np.array(df1.drop(drop_list,axis=1).columns)
print(state_names)
np.save('state_names',state_names)
np.save('action_names',action_names)

['14FIC015.MEAS' '16FIC501.MEAS' '14TI521.PNT' '14PRCA502.MEAS'
 '14FRC509.MEAS' '14FRCA506.MEAS' '14TI504.PNT' '14TI532.PNT'
 '14PRCA506.MEAS' '10FRCA505.MEAS' '14FICA508.MD' '14TRC515.MD'
 '14FRCA513.MD' '14FRCA511.MD' '14TIC527.MEAS' '14QI506.PNT'
 '14FRC514.OUT' '14FRC501.OUT' '14TI535.MEAS' '14TI528.PNT'
 '14FRC514.MEAS' '14FRC501.MEAS' '14QI508.PNT' '14TY513.RO01'
 '14FIC503.MEAS' '14TI502.PNT' '16Q001.PNT' '14QRA502.PNT' '14Y559.RO01'
 '14LRCA503.MEAS']


# 2019 Data

### December Data

In [14]:
df2=pd.read_csv(file_name2,skiprows=5,names=name_of_columns)
df2['timestamp']=df2['time'].apply(lambda x: datetime.strptime(x,'%m/%d/%Y %H:%M').timestamp())

### Jan-Sep Data

In [15]:
df3=pd.read_csv(file_name3,skiprows=5,names=name_of_columns)
df3['timestamp']=df3['time'].apply(lambda x: datetime.strptime(x,'%m/%d/%Y %H:%M').timestamp())

# 2018

In [16]:
df4=pd.read_csv(file_name4,skiprows=5,names=name_of_columns)
df4['timestamp']=df4['time'].apply(lambda x: datetime.strptime(x,'%m/%d/%Y %H:%M').timestamp())

# 2017

In [17]:
df5=pd.read_csv(file_name5,skiprows=5,names=name_of_columns)
df5['timestamp']=df5['time'].apply(lambda x: datetime.strptime(x,'%m/%d/%Y %H:%M').timestamp())

# 2016

In [18]:
df6=pd.read_csv(file_name6,skiprows=5,names=name_of_columns)
df6['timestamp']=df3['time'].apply(lambda x: datetime.strptime(x,'%m/%d/%Y %H:%M').timestamp())

### Store Pandas Data Frame in Disk

We would probably like to have the data organized:

In [19]:
#new dataframe with the [timestamp,actions,states]
new_col=['time']+['timestamp']+list(action_names)+list(state_names)
#print(new_col)

In [20]:
ndf1=df1[new_col]
ndf2=df2[new_col]
ndf3=df3[new_col]
ndf4=df4[new_col]
ndf5=df5[new_col]
ndf6=df6[new_col]

In [21]:
#understanding the order on the dataset
print(ndf1.iloc[0,:]['time'])
print(ndf1.iloc[-1,:]['time'])
print(ndf2.iloc[0,:]['time'])
print(ndf2.iloc[-1,:]['time'])
print(ndf3.iloc[0,:]['time'])
print(ndf3.iloc[-1,:]['time'])
print(ndf4.iloc[0,:]['time'])
print(ndf4.iloc[-1,:]['time'])
print(ndf5.iloc[0,:]['time'])
print(ndf5.iloc[-1,:]['time'])
print(ndf6.iloc[0,:]['time'])
print(ndf6.iloc[-1,:]['time'])

1/1/2020 0:00
3/9/2020 12:00
12/1/2019 0:00
1/1/2020 11:42
1/1/2019 0:00
9/1/2019 12:00
1/1/2018 0:00
1/1/2019 0:00
1/1/2017 0:00
1/1/2018 12:00
1/1/2016 0:00
1/1/2017 0:00


In [22]:
'''
The first row is the earliest date, the last row is the latest date.
ndf1=2020 data (newer)
ndf2=2019 data (older)
vertical_stack = pd.concat([older, newer], axis=0)
vertical_stack = pd.concat([top, bottom], axis=0)

'''
vertical_stack = pd.concat([ndf6,ndf5,ndf4,ndf3,ndf2,ndf1], axis=0)

In [23]:
vertical_stack.shape

(2073588, 40)

In [24]:
dataset_name=('%sdata.csv'%(data_out_dir))
print(dataset_name)

C:/Users/aalbaher/dataset_pttgc/data_preprocess/May_13_165417/data.csv


### Storing vertical stack

In [25]:
#storing all the data into a csv file, the data is all concatenated and organized in this way.
#'time','timestamp',actions,states
vertical_stack.to_csv(dataset_name)

# Create inputs with the whole dataset

In [26]:
action_input=np.array(vertical_stack[action_names].iloc[0:len(vertical_stack)-1,:]) #skip last row
state_input=np.array(vertical_stack[state_names].iloc[0:len(vertical_stack)-1,:]) #skip last row
state_output=np.array(vertical_stack[state_names].iloc[1::,:]) #skip fist row

In [27]:
print(action_input.shape)
print(state_input.shape)
print(state_output.shape)

(2073587, 8)
(2073587, 30)
(2073587, 30)


In [28]:
x_set_total = np.concatenate((action_input,state_input),axis=1)
y_set_total = state_output

In [29]:
#writing dataset into disk
file_x=('%sx_set_total.pickle'%(data_out_dir))
file_y=('%sy_set_total.pickle'%(data_out_dir))
with open('x_set_total.pickle', 'wb') as f:
    pickle.dump(x_set_total, f, pickle.HIGHEST_PROTOCOL)
with open('y_set_total.pickle', 'wb') as f:
    pickle.dump(y_set_total, f, pickle.HIGHEST_PROTOCOL)

# Create output as difference between states

Given that some of the models are not learning properly from the timeseries, we may need need to predict the difference instead of the the actual value. <br>

- the input is $\vec{X}=[ \vec{a_{n}},\vec{s_{n}} ]$ <br>
- the output is  $\vec{Y}=\vec{s_{n+1}}-\vec{s_{n}}$ <br>

In [30]:
x_set_diff=x_set_total
y_set_diff=state_output-state_input

In [31]:
#writing dataset into disk
file_x_diff=('%sx_set_total.pickle'%(data_out_dir))
file_y_diff=('%sy_set_total.pickle'%(data_out_dir))
with open(file_x_diff, 'wb') as f:
    pickle.dump(x_set_diff, f, pickle.HIGHEST_PROTOCOL)
with open(file_y_diff, 'wb') as f:
    pickle.dump(y_set_diff, f, pickle.HIGHEST_PROTOCOL)

# Testing code

Everything below this point can be ignore, it was use during the develop.

In [32]:
t='12/1/2019 12:46:00 AM'
datetime.strptime(t,'%m/%d/%Y %H:%M:%S %p')
#datetime.strptime(df2['time'][46],'%m/%d/%Y %H:%M %p')
#df2['time'][46]

datetime.datetime(2019, 12, 1, 12, 46)

# Getting priority var names

In [33]:
#short_list={}
#print(headers_dic)
print(len(short_list))

26


In [34]:
#Second Try
var_dic={
    'state_var':{
                'C5_LPG':'16Q001.PNT',
                'LN_RVP':'14QRA502.PNT',
                'LN_95':'14QI506.PNT',
                'HN_10':'14QI508.PNT',
                'LN_valve':'14FRC514.OUT',
                'HN_valve':'14FRC501.OUT',
                'LPG_dist_flow':'14FRC509.MEAS',
                'LN_dist_flow':'14FRC514.MEAS',
                },
         
    'action_var':{
                'C1451_reflux ':'14FICA508.MEAS',
                'Tray_5':'14TI528.PNT',
                'C1452_reflux':'14FRCA513.MEAS',
                'MPS_Steam':'14FRCA511.MEAS'
                },
         
     'predict_var':'14FRC501.MEAS'
    }

'''
#First try
var_dic={
    'state_var':{
                'C5_LPG':'16Q001.PNT',
                'LN_RVP':'14QRA502.PNT',
                'LN_95':'14QI506.PNT',
                'HN_10':'14QI508.PNT',
                'Reflux_dist_ratio':'14Y559.RO01',
                'LN_valve':'14FRC514.OUT',
                'HN_valve':'14FRC501.OUT',
                'LPG_dist_flow':'14FRC509.MEAS',
                'LN_dist_flow':'14FRC514.MEAS',
                },
         
    'action_var':{
                'C1451_reflux ':'14FICA508.MEAS',
                'Tray_5':'14TI528.PNT',
                'C1452_reflux':'14FRCA513.MEAS',
                'MPS_Steam':'14FRCA511.MEAS'
                },
         
     'predict_var':'14FRC501.MEAS'
    }
'''

"\n#First try\nvar_dic={\n    'state_var':{\n                'C5_LPG':'16Q001.PNT',\n                'LN_RVP':'14QRA502.PNT',\n                'LN_95':'14QI506.PNT',\n                'HN_10':'14QI508.PNT',\n                'Reflux_dist_ratio':'14Y559.RO01',\n                'LN_valve':'14FRC514.OUT',\n                'HN_valve':'14FRC501.OUT',\n                'LPG_dist_flow':'14FRC509.MEAS',\n                'LN_dist_flow':'14FRC514.MEAS',\n                },\n         \n    'action_var':{\n                'C1451_reflux ':'14FICA508.MEAS',\n                'Tray_5':'14TI528.PNT',\n                'C1452_reflux':'14FRCA513.MEAS',\n                'MPS_Steam':'14FRCA511.MEAS'\n                },\n         \n     'predict_var':'14FRC501.MEAS'\n    }\n"

In [35]:
var_dic

{'state_var': {'C5_LPG': '16Q001.PNT',
  'LN_RVP': '14QRA502.PNT',
  'LN_95': '14QI506.PNT',
  'HN_10': '14QI508.PNT',
  'LN_valve': '14FRC514.OUT',
  'HN_valve': '14FRC501.OUT',
  'LPG_dist_flow': '14FRC509.MEAS',
  'LN_dist_flow': '14FRC514.MEAS'},
 'action_var': {'C1451_reflux ': '14FICA508.MEAS',
  'Tray_5': '14TI528.PNT',
  'C1452_reflux': '14FRCA513.MEAS',
  'MPS_Steam': '14FRCA511.MEAS'},
 'predict_var': '14FRC501.MEAS'}

In [36]:
state_names

array(['14FIC015.MEAS', '16FIC501.MEAS', '14TI521.PNT', '14PRCA502.MEAS',
       '14FRC509.MEAS', '14FRCA506.MEAS', '14TI504.PNT', '14TI532.PNT',
       '14PRCA506.MEAS', '10FRCA505.MEAS', '14FICA508.MD', '14TRC515.MD',
       '14FRCA513.MD', '14FRCA511.MD', '14TIC527.MEAS', '14QI506.PNT',
       '14FRC514.OUT', '14FRC501.OUT', '14TI535.MEAS', '14TI528.PNT',
       '14FRC514.MEAS', '14FRC501.MEAS', '14QI508.PNT', '14TY513.RO01',
       '14FIC503.MEAS', '14TI502.PNT', '16Q001.PNT', '14QRA502.PNT',
       '14Y559.RO01', '14LRCA503.MEAS'], dtype=object)

In [37]:
column_selector=[]
short_actions=[]
short_states=[]
#first lets put all actions
for key in var_dic['action_var']:
    column_selector.append(var_dic['action_var'][key])
print(len(column_selector))
short_actions=deepcopy(column_selector) #safe all actions variables

#second all states
for key in var_dic['state_var']:
    column_selector.append(var_dic['state_var'][key])
    short_states.append(var_dic['state_var'][key]) #safe states
    
#third the optimization var
column_selector.append(var_dic['predict_var']) 
short_states.append(var_dic['predict_var'])

4


In [38]:
print(column_selector)
print('column_selector',len(column_selector))
print('short_actions',len(short_actions))
print('short_states',len(short_states))

['14FICA508.MEAS', '14TI528.PNT', '14FRCA513.MEAS', '14FRCA511.MEAS', '16Q001.PNT', '14QRA502.PNT', '14QI506.PNT', '14QI508.PNT', '14FRC514.OUT', '14FRC501.OUT', '14FRC509.MEAS', '14FRC514.MEAS', '14FRC501.MEAS']
column_selector 13
short_actions 4
short_states 9


### Create New Dataframe with smaler action-state-space

In [39]:
df_short=vertical_stack[column_selector]

In [40]:
df_short[short_actions]
#df_short[short_states]

Unnamed: 0,14FICA508.MEAS,14TI528.PNT,14FRCA513.MEAS,14FRCA511.MEAS
0,2169.806396,115.848541,2493.364014,16.972578
1,2165.187012,115.930305,2503.087891,16.983036
2,2134.303467,115.982369,2496.101807,16.995716
3,2144.191406,115.944038,2493.253662,17.020340
4,2152.655762,115.962944,2494.748047,17.034838
...,...,...,...,...
98636,2088.255127,114.624886,2259.743896,7.096138
98637,2091.254639,114.542984,2279.686279,7.112698
98638,2088.333008,114.477219,2278.392578,7.094784
98639,2089.855469,114.768044,2278.239990,7.069374


#### Storing Data Frame

In [41]:
csv_name='%sdata_short.csv'%(data_out_dir)
df_short.to_csv(csv_name)
print(df_short.shape)
print(csv_name)

(2073588, 13)
C:/Users/aalbaher/dataset_pttgc/data_preprocess/May_13_165417/data_short.csv


In [42]:
#get arrays
a_i=np.array(df_short[short_actions].iloc[0:len(df_short)-1,:]) #skip last row
s_i=np.array(df_short[short_states].iloc[0:len(df_short)-1,:]) #skip last row
s_o=np.array(df_short[short_states].iloc[1::,:]) #skip fist row
print(len(short_actions))
print(a_i.shape)
print(s_i.shape)
print(s_o.shape)

4
(2073587, 4)
(2073587, 9)
(2073587, 9)


In [43]:
#prepare output short list dataset
x_set_short = np.concatenate((a_i,s_i),axis=1)
y_set_short = s_o

In [44]:
#writing dataset into disk
x=('%sshort_x.pickle'%(data_out_dir))
y=('%sshort_y.pickle'%(data_out_dir))
with open(x, 'wb') as f:
    pickle.dump(x_set_short, f, pickle.HIGHEST_PROTOCOL)
with open(y, 'wb') as f:
    pickle.dump(y_set_short, f, pickle.HIGHEST_PROTOCOL)
print(x)
print(y)
print(x_set_short.shape)
print(y_set_short.shape)

C:/Users/aalbaher/dataset_pttgc/data_preprocess/May_13_165417/short_x.pickle
C:/Users/aalbaher/dataset_pttgc/data_preprocess/May_13_165417/short_y.pickle
(2073587, 13)
(2073587, 9)


In [45]:
def get_shift_cut(actions,states,shift):
    _,num_actions_columns=actions.shape
    a_s_in=np.concatenate((actions,states),axis=1)
    #print('a_s_shift\n',a_s_in)
    r,c=a_s_in.shape #rows, columns
    n=r//shift
    a_s_shift=np.zeros((n,c))
    #print(a_s_shift.shape)
    #print(n)
    for i in range(1,n):
        #print(i)
        a_s_shift[i,:]=a_s_in[i*shift,:]
    x_set=a_s_shift[0:-1,:]
    y_set=a_s_shift[1::,num_actions_columns::]
    return a_s_shift,x_set,y_set

In [46]:
_,x_short_cut,y_short_cut=get_shift_cut(a_i,s_i,5)
print('x',x_short_cut.shape)
print('y',y_short_cut.shape)

x (414716, 13)
y (414716, 9)


In [47]:
#safe cut data_set
x=('%sshort_x_cut.pickle'%(data_out_dir))
y=('%sshort_y_cut.pickle'%(data_out_dir))
with open(x, 'wb') as f:
    pickle.dump(x_short_cut, f, pickle.HIGHEST_PROTOCOL)
with open(y, 'wb') as f:
    pickle.dump(y_short_cut, f, pickle.HIGHEST_PROTOCOL)
print(x)
print(y)

C:/Users/aalbaher/dataset_pttgc/data_preprocess/May_13_165417/short_x_cut.pickle
C:/Users/aalbaher/dataset_pttgc/data_preprocess/May_13_165417/short_y_cut.pickle


In [48]:
print(x_short_cut.shape)
print(y_short_cut.shape)

(414716, 13)
(414716, 9)


In [49]:
#print(s_i[0:2,:])

In [50]:
#print(s_o[0:3,:])

In [51]:
#Saving actions and state names for the reduced dataframe.
f_ns='%sshort_states'%(data_out_dir)
f_na='%sshort_actions'%(data_out_dir)
print('short_states:\n',short_states)
print('short_actions\n',short_actions)
np.save(f_ns,short_states)
np.save(f_na,short_actions)
print(f_ns)
print(f_na)

short_states:
 ['16Q001.PNT', '14QRA502.PNT', '14QI506.PNT', '14QI508.PNT', '14FRC514.OUT', '14FRC501.OUT', '14FRC509.MEAS', '14FRC514.MEAS', '14FRC501.MEAS']
short_actions
 ['14FICA508.MEAS', '14TI528.PNT', '14FRCA513.MEAS', '14FRCA511.MEAS']
C:/Users/aalbaher/dataset_pttgc/data_preprocess/May_13_165417/short_states
C:/Users/aalbaher/dataset_pttgc/data_preprocess/May_13_165417/short_actions
