In [4]:
from os import listdir
from os.path import isfile, join
import pandas as pd
import numpy as np

In [5]:
DATA_FOLDER = "data/"
COLUMN_NAMES = ["time", "i1", "v1", "i2", "v2", "i3", "v3", "i4", "v4"]

In [6]:
def read_all_files_in_folder_inl(folder_name: str):
  path = DATA_FOLDER + folder_name
  file_list = [join(path, f) for f in listdir(path) if isfile(join(path, f))]
  df_list = []
  for fname in file_list:
    if fname.endswith(".xlsx"):
      print(fname)
      df = pd.read_excel(fname, header=1)
      df = df.dropna(axis=1, how='all')
      df_list.append(df)
  return df_list


In [7]:
def read_all_files_in_folder_v1(folder_name: str):
  path = DATA_FOLDER + folder_name
  file_list = [join(path, f) for f in listdir(path) if isfile(join(path, f))]
  df_list = []
  for fname in file_list:
    if fname.endswith(".csv"):
      print(fname)
      df = pd.read_csv(fname, sep="\t", header=None)
      if len(df.columns) > len(COLUMN_NAMES):
        df = df.drop(df.columns[range(len(COLUMN_NAMES),len(df.columns))], axis=1)
      df.columns = COLUMN_NAMES
      df_list.append(df)
  return df_list

In [8]:
def read_all_files_in_folder_v2(folder_name: str):
  path = DATA_FOLDER + folder_name
  file_list = [join(path, f) for f in listdir(path) if isfile(join(path, f))]
  df_list = []
  for fname in file_list:
    if fname.endswith(".xlsx"):
      print(fname)
      df = pd.read_excel(fname, header=0, sheet_name="Sheet1")
      df = df.dropna(axis=1, how='all')
      df = df.drop(df.columns[1], axis=1)
      df.iloc[:,1] = df.iloc[:,1]/1000.0
      df_list.append(df)
  return df_list

In [63]:
def read_all_files_in_folder_v3(folder_name: str):
  COLUMN_NAMES = ["time", "i1", "v1", "t1", "i2", "v2", "t2", "i3", "v3", "t3", "i4", "v4", "t4"]
  path = DATA_FOLDER + folder_name
  file_list = [join(path, f) for f in listdir(path) if isfile(join(path, f))]
  df_list = []
  for fname in file_list:
    if fname.endswith(".csv"):
      print(fname)
      df = pd.read_csv(fname, sep="\t", header=2)
      print(df)
      if len(df.columns) > len(COLUMN_NAMES):
        df = df.drop(df.columns[range(len(COLUMN_NAMES),len(df.columns))], axis=1)
      df = df.loc[100:]
      #df.columns = COLUMN_NAMES
      df_list.append(df)
  return df_list

In [64]:
def create_input_output_data(df_list, NUMBER_OF_CHANNELS):
  input_data = []
  output_data = []
  for df in df_list:
    N = len(df.index)
    if N >= 3600:
      for i in range(NUMBER_OF_CHANNELS):
        print(i)
        current_column = i*3+2
        if current_column>=len(df.columns):
          break
        if df.iloc[:, [current_column]].gt(0).all().item() or df.iloc[:, [current_column]].isna().all().item():
          continue
        first_negative_current = df.iloc[:, [current_column]].lt(0).idxmax().item()
        print(first_negative_current)
        #print(df.iloc[:first_negative_current-1, current_column].mean())
        input = df.iloc[first_negative_current:, [0, current_column]]
        input_data.append(input)
        last_valid_index = df.iloc[:, current_column].notna()[::-1].idxmax()
        output = float(df.iloc[last_valid_index-300:last_valid_index, [current_column]].mean())
        output_data.append(output)
  return input_data, output_data

In [65]:
def preprocess(folder_name: str):
  if "v1" in folder_name:
    x = read_all_files_in_folder_v1(folder_name)
    i,o = create_input_output_data(x, 4)   

  if "v2" in folder_name:
    x = read_all_files_in_folder_v2(folder_name)
    i,o = create_input_output_data(x, 1)   

  if "v3" in folder_name:
    x = read_all_files_in_folder_v3(folder_name)
    i,o = create_input_output_data(x, 4)   

  if "inl" in folder_name:
    x = read_all_files_in_folder_inl(folder_name)
    i,o = create_input_output_data(x, 5)   
  
  return i,o

In [66]:
if __name__ == "__main__":
  i,o = preprocess("Data_v3")

/content/drive/MyDrive/Gdrive/startup/Novus Sentry/data/Data_v3/NS_DEV_Test3_Oct2322_1104PM.csv
      Time (S)  Current Ch1  Voltage Ch1  Temp Ch1 (C)  Current Ch2  \
0          0.0     4.102375     0.000600     22.176249     2.613719   
1          1.0     4.102053    -0.000366     22.169684     2.592144   
2          2.0     4.102053    -0.000366     22.209078     2.589890   
3          3.0     4.102053    -0.000688     22.189380     2.586026   
4          4.0     4.102053    -0.001010     22.189380     2.585382   
...        ...          ...          ...           ...          ...   
3757    3775.0     4.082733    -0.573867     22.570508     2.588280   
3758    3776.0     4.082089    -0.562275     22.596816     2.580230   
3759    3777.0     4.082733    -0.563885     22.550779     2.574112   
3760    3778.0     4.082411    -0.569359     22.590238     2.574434   
3761    3779.0     4.082089    -0.571291     22.583661     2.570569   

      Voltage Ch2  Temp Ch2 (C)  Unnamed: 7  Unname

In [67]:
i

[      Time (S)  Voltage Ch1
 200      222.0    -1.414638
 201      223.0    -1.409164
 202      224.0    -1.398859
 203      225.0    -1.402079
 204      226.0    -1.399825
 ...        ...          ...
 3757    3775.0    -0.573867
 3758    3776.0    -0.562275
 3759    3777.0    -0.563885
 3760    3778.0    -0.569359
 3761    3779.0    -0.571291
 
 [3562 rows x 2 columns],       Time (S)  Voltage Ch2
 200      222.0    -1.013412
 201      223.0    -1.005362
 202      224.0    -1.002464
 203      225.0    -1.000210
 204      226.0    -0.998600
 ...        ...          ...
 3757    3775.0    -0.455045
 3758    3776.0    -0.448283
 3759    3777.0    -0.452147
 3760    3778.0    -0.455367
 3761    3779.0    -0.457299
 
 [3562 rows x 2 columns]]