In [10]:
from typing import Iterable, List, Dict
import os
import gc

In [2]:
import pandas as pd
import numpy as np

In [3]:
PATH_TO_DATA = "c:/Hydraulics/data/"
NUMBER_OF_PROFILES = 2205
PROFILE_MAX_SAMPLE_RATE = 6000
TARGET_NAMES = ["cooler", "valve", "leakage", "accumulator", "stable"]

In [4]:
read_files_config = [
    {"name": "CE", "resample_coeff": 100},
    {"name": "CP", "resample_coeff": 100},
    {"name": "EPS1", "resample_coeff": 1},
    {"name": "FS1", "resample_coeff": 10},
    {"name": "FS2", "resample_coeff": 10},
    {"name": "PS1", "resample_coeff": 1},
    {"name": "PS2", "resample_coeff": 1},
    {"name": "PS3", "resample_coeff": 1},
    {"name": "PS4", "resample_coeff": 1},
    {"name": "PS5", "resample_coeff": 1},
    {"name": "PS6", "resample_coeff": 1},
    {"name": "SE", "resample_coeff": 100},
    {"name": "TS1", "resample_coeff": 100},
    {"name": "TS2", "resample_coeff": 100},
    {"name": "TS3", "resample_coeff": 100},
    {"name": "TS4", "resample_coeff": 100},
    {"name": "VS1", "resample_coeff": 100},
]

In [5]:
def get_files(config: List[Dict]) -> Iterable[np.ndarray]:
    for file in config:
        data = np.genfromtxt(PATH_TO_DATA + file["name"] + ".txt", dtype=float, delimiter='\t')
        yield np.repeat(data, file["resample_coeff"], axis=1).flatten()

In [6]:
def load_feature_dataframe(config: List[Dict]) -> pd.DataFrame:
    columns = [file["name"] for file in config]
    data = np.stack(get_files(config), axis=-1)
    return pd.DataFrame(data, columns=columns)

In [7]:
def load_targets(filename: str) -> pd.DataFrame:
    conditions_data = np.genfromtxt(PATH_TO_DATA + filename, dtype=int, delimiter='\t')
    processed_conditions_data = np.repeat(conditions_data, PROFILE_MAX_SAMPLE_RATE, axis=0)
    conditions_df = pd.DataFrame(processed_conditions_data, columns=TARGET_NAMES)

    prodile_ids = np.repeat(range(1, NUMBER_OF_PROFILES+1), PROFILE_MAX_SAMPLE_RATE)
    prodile_ids_df = pd.DataFrame(prodile_ids, columns=["profile_id"])

    return pd.concat([prodile_ids_df, conditions_df], axis=1, sort=False)   

In [8]:
feature_df = load_feature_dataframe(read_files_config)
target_df = load_targets("profile.txt")
df = pd.concat([target_df, feature_df], axis=1, sort=False)

del feature_df
del target_df
gc.collect()