In [1]:
import numpy as np
import pandas as pd
import re

In [2]:
# Convert 'fix'(int) to string, containing the names of features with value 1 and to boolean array of these features
regimes = np.array(['is_rab','is_rez','is_rem','is_reg','is_ispr','is_neispr','is_otkaz','is_zavisim','is_vkl','is_vkl2','is_trig','is_trba','is_vedushciy','is_mu','is_blokirovka'])
powers = [2 ** x for x in range(15)]
def int_to_regimes_str(n):
    binars = np.asarray([True if n & x > 0 else False for x in powers])
    return str(regimes[binars])
def int_to_regimes_bool(n):
    return np.asarray([1 if n & x > 0 else 0 for x in powers])

In [3]:
# Convert 'addr'(int) to the name of element
addr64_dict = {}
f = open("addr64.txt","r")
for line in f:
    result = re.match(r'^(.{18}) .+- (.+)$', line)
    addr64_dict[str(int(result.group(1),16))] = result.group(2)
f.close()
def dec_addr_to_name(n):
    if n in addr64_dict.keys():
        return addr64_dict[str(n)]
    else:
        return "Noname"

In [4]:
# Function to convert string sorrounded by curly bracket to the array of values
def s_to_arr(s):
    return np.asarray(s.replace('{','').replace('}','').split(','))

In [5]:
df = pd.read_csv("storage_techstate_mssa2.export",sep='\t')
print(df.info())
df.head(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31195332 entries, 0 to 31195331
Data columns (total 11 columns):
 #   Column            Dtype  
---  ------            -----  
 0   uuid              object 
 1   reg_time          object 
 2   deltammmctime     int64  
 3   addr              int64  
 4   cmd               int64  
 5   fix               int64  
 6   state             object 
 7   data              object 
 8   fpo_work_mode     int64  
 9   hostname          object 
 10  fpo_version_hash  float64
dtypes: float64(1), int64(5), object(5)
memory usage: 2.6+ GB
None


Unnamed: 0,uuid,reg_time,deltammmctime,addr,cmd,fix,state,data,fpo_work_mode,hostname,fpo_version_hash
0,833f5cc4-a18d-4cff-a490-9bd08c8cf191,2020-10-20 12:42:29.226324,-11156,2306406028020813824,1,274,"{0,0,0,0,0,0}","{0,0,0,0,0,0}",0,C2KV1A15,
1,395d513d-2d6c-4ef5-b674-b9a71842154e,2020-10-20 12:42:29.226324,-11156,2306406028020826112,1,274,"{0,0,0,0,0,0}","{0,0,0,0,0,0}",0,C2KV1A15,
2,42722f1d-7359-4463-b2ab-7bf5abd5af2f,2020-10-20 12:42:29.226324,-11156,2306406028020862976,1,274,"{0,0,0,0,0,0}","{0,0,0,0,0,0}",0,C2KV1A15,
3,188eff9c-ae1a-461b-ad8e-770d6d31276a,2020-10-20 12:42:29.226324,-11156,2306406028020830208,1,274,"{0,0,0,0,0,0}","{0,0,0,0,0,0}",0,C2KV1A15,
4,97f752d0-4cea-4d3d-a85a-54e165ce90e3,2020-10-20 12:42:29.226335,-11156,2306406028004098048,1,274,"{0,0,0,0,0,0}","{0,0,0,0,0,0}",0,C2KV1A15,


In [6]:
for i in range(6):
    df['state'+str(i)] = np.zeros(shape=(len(df),1))
for i in range(6):
    df['data'+str(i)] = np.zeros(shape=(len(df),1))
df['regimes'] = np.zeros(shape=(len(df),1))
for s in regimes:
    df[s] = np.zeros(shape=(len(df),1),dtype=np.int)

df[['state0','state1','state2','state3','state4','state5']] = np.asarray([s_to_arr(x) for x in df['state']])
df[['data0','data1','data2','data3','data4','data5']] = np.asarray([s_to_arr(x) for x in df['data']])
df['regimes'] = np.asarray([int_to_regimes_str(x) for x in df['fix']])
df[regimes] = np.asarray([int_to_regimes_bool(x) for x in df['fix']])
df['element_name'] = np.asarray([dec_addr_to_name(str(x)) for x in df['addr']])

df.drop(columns=['data','state','hostname','fpo_version_hash','deltammmctime'],inplace=True)

df.head(50)

Unnamed: 0,uuid,reg_time,addr,cmd,fix,fpo_work_mode,state0,state1,state2,state3,...,is_otkaz,is_zavisim,is_vkl,is_vkl2,is_trig,is_trba,is_vedushciy,is_mu,is_blokirovka,element_name
0,833f5cc4-a18d-4cff-a490-9bd08c8cf191,2020-10-20 12:42:29.226324,2306406028020813824,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
1,395d513d-2d6c-4ef5-b674-b9a71842154e,2020-10-20 12:42:29.226324,2306406028020826112,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
2,42722f1d-7359-4463-b2ab-7bf5abd5af2f,2020-10-20 12:42:29.226324,2306406028020862976,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
3,188eff9c-ae1a-461b-ad8e-770d6d31276a,2020-10-20 12:42:29.226324,2306406028020830208,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
4,97f752d0-4cea-4d3d-a85a-54e165ce90e3,2020-10-20 12:42:29.226335,2306406028004098048,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
5,4a3c9eb8-3e5f-4019-b667-8d1c20cde8c4,2020-10-20 12:42:29.226335,2306406028004106240,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
6,fb1599bf-7ac7-48cb-a303-b7658cb2f5ef,2020-10-20 12:42:29.226335,2306406028004200448,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
7,246c4207-fb7d-44ce-82f1-5195979bf1e5,2020-10-20 12:42:29.226335,2306406028004204544,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
8,9458b996-743b-4fb3-badd-0949f8bb49ce,2020-10-20 12:42:29.226335,2306406028004093952,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
9,d358d267-223d-4980-bc85-8f6462b1412b,2020-10-20 12:42:30.380131,2306406028004093952,1045,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname


In [8]:
df['element_name'] = np.asarray([dec_addr_to_name(str(x)) for x in df['addr']])
#df.drop(columns=['addr'],inplace=True)
df.head(50)
#df.drop(columns=['fix'],inplace=True)
#df.to_csv("techstate_mssa2.csv")

Unnamed: 0,uuid,reg_time,addr,cmd,fix,fpo_work_mode,state0,state1,state2,state3,...,is_otkaz,is_zavisim,is_vkl,is_vkl2,is_trig,is_trba,is_vedushciy,is_mu,is_blokirovka,element_name
0,833f5cc4-a18d-4cff-a490-9bd08c8cf191,2020-10-20 12:42:29.226324,2306406028020813824,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,
1,395d513d-2d6c-4ef5-b674-b9a71842154e,2020-10-20 12:42:29.226324,2306406028020826112,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,
2,42722f1d-7359-4463-b2ab-7bf5abd5af2f,2020-10-20 12:42:29.226324,2306406028020862976,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,
3,188eff9c-ae1a-461b-ad8e-770d6d31276a,2020-10-20 12:42:29.226324,2306406028020830208,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,
4,97f752d0-4cea-4d3d-a85a-54e165ce90e3,2020-10-20 12:42:29.226335,2306406028004098048,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,
5,4a3c9eb8-3e5f-4019-b667-8d1c20cde8c4,2020-10-20 12:42:29.226335,2306406028004106240,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,
6,fb1599bf-7ac7-48cb-a303-b7658cb2f5ef,2020-10-20 12:42:29.226335,2306406028004200448,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,
7,246c4207-fb7d-44ce-82f1-5195979bf1e5,2020-10-20 12:42:29.226335,2306406028004204544,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,
8,9458b996-743b-4fb3-badd-0949f8bb49ce,2020-10-20 12:42:29.226335,2306406028004093952,1,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,
9,d358d267-223d-4980-bc85-8f6462b1412b,2020-10-20 12:42:30.380131,2306406028004093952,1045,274,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,


In [7]:
df.drop(columns=['fix','addr'],inplace=True)
df.head(10)

Unnamed: 0,uuid,reg_time,cmd,fpo_work_mode,state0,state1,state2,state3,state4,state5,...,is_otkaz,is_zavisim,is_vkl,is_vkl2,is_trig,is_trba,is_vedushciy,is_mu,is_blokirovka,element_name
0,833f5cc4-a18d-4cff-a490-9bd08c8cf191,2020-10-20 12:42:29.226324,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
1,395d513d-2d6c-4ef5-b674-b9a71842154e,2020-10-20 12:42:29.226324,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
2,42722f1d-7359-4463-b2ab-7bf5abd5af2f,2020-10-20 12:42:29.226324,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
3,188eff9c-ae1a-461b-ad8e-770d6d31276a,2020-10-20 12:42:29.226324,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
4,97f752d0-4cea-4d3d-a85a-54e165ce90e3,2020-10-20 12:42:29.226335,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
5,4a3c9eb8-3e5f-4019-b667-8d1c20cde8c4,2020-10-20 12:42:29.226335,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
6,fb1599bf-7ac7-48cb-a303-b7658cb2f5ef,2020-10-20 12:42:29.226335,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
7,246c4207-fb7d-44ce-82f1-5195979bf1e5,2020-10-20 12:42:29.226335,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
8,9458b996-743b-4fb3-badd-0949f8bb49ce,2020-10-20 12:42:29.226335,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname
9,d358d267-223d-4980-bc85-8f6462b1412b,2020-10-20 12:42:30.380131,1045,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Noname


In [8]:
df.drop(columns=['uuid'],inplace=True)
df.to_csv("techstate_mssa2_new.csv")