In [13]:
import re
from typing import List, Any

import numpy as np
import pandas as pd

In [2]:
class InvalidFormatError(Exception):
    pass


def pipe(raw_input: Any, *functions: Any, **functions_with_args: Any) -> Any:

    output = raw_input

    if functions:
        for function in functions:
            output = function(output)

    if functions_with_args:
        for function, args_list in functions_with_args.items():
            output = eval(function)(output, *args_list)

    return output


def _clean_states(text: str) -> List[str]:
    return text.strip('][').strip().replace('\n', '')   


def _format_whitespaces(text: str) -> List[str]:
    return re.sub(' {2,}', ' ', text).split()


def _evaluate_states(states: List[str]) -> List[float]:
    return [eval(state) for state in states]


def _format_states(states: str) -> List[str]:
    return pipe(
        states,
        _clean_states,
        _format_whitespaces,
        _evaluate_states
    )


def format_states(df: pd.DataFrame) -> pd.DataFrame:
    if df.shape[0] == 1:
        return df.T[0].apply(_format_states).apply(pd.Series)
    else:
        try:
            return df[0].apply(_format_states).apply(pd.Series)
        except Exception:
            raise InvalidFormatError("Invalid DataFrame format")

In [3]:
path = '/Users/d.e.magno/Temporary/playground/py/epi-rl/states.csv'

In [4]:
df = pd.read_csv(path, header=None)

In [7]:
df_states = format_states(df)

In [11]:
df_states

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,0.128028,0.268238,0.99995,0.15963,0.0,2.44133e-05,2.801172e-05,5.083611e-06,1.075038e-05,5.834197e-05,9e-06,1e-06,1e-06,2.6e-05,2e-05,7e-06,1.0
1,0.128011,0.2679,0.999748,0.159624,5.005097e-07,7.528829e-05,0.0001217185,1.636435e-05,3.781568e-06,9.03912e-05,6e-06,0.0,3.3e-05,0.000281,0.00014,3.2e-05,1.0
2,0.127936,0.266611,0.9986,0.159546,2.901063e-05,0.0003272127,0.0003010867,1.363446e-05,1.585567e-05,0.0004117051,0.000345,2.6e-05,8.7e-05,0.000997,0.00077,8.8e-05,1.0
3,0.127358,0.259969,0.99245,0.159187,0.0001314112,0.001621365,0.001411227,0.0001080535,0.0001319772,0.002031826,0.001948,8.9e-05,0.000446,0.004725,0.004206,0.000289,1.0
4,0.125588,0.25363,0.977786,0.158279,0.00031515,0.0009619491,0.002262955,0.0001528161,0.0004560462,0.001652298,0.004051,0.000187,0.001708,0.012102,0.015915,0.001055,0.5
5,0.122908,0.227698,0.947062,0.156421,0.00060601,0.006272547,0.0061822,0.0002659699,0.0007863169,0.007843621,0.009266,0.000676,0.003768,0.026532,0.037505,0.00231,0.5
6,0.115631,0.157828,0.859842,0.150377,0.00129733,0.01159764,0.01691418,0.0012112,0.00211511,0.01971497,0.025397,0.001714,0.009024,0.079206,0.097862,0.006371,0.5
7,0.104052,0.130206,0.751393,0.142748,0.00145769,0.0025489,0.01248247,0.00098078,0.00296953,0.0062837,0.026197,0.002066,0.019589,0.129308,0.209943,0.013878,0.0
8,0.097691,0.10531,0.683724,0.137068,0.00075879,0.00289227,0.00812752,0.00076763,0.00155333,0.00636735,0.01587,0.001394,0.028064,0.153777,0.292294,0.020444,0.0
9,0.093894,0.089305,0.640434,0.133173,0.0005370386,0.00170563,0.004537154,0.0004450802,0.0008449628,0.003816625,0.01082,0.001012,0.032792,0.17352,0.344225,0.025043,0.0


In [19]:
X = df_states.loc[0].values

In [25]:
from sklearn.preprocessing import MinMaxScaler

In [32]:
sample = df_states.loc[0].values

In [28]:
scaler = MinMaxScaler()

In [30]:
scaler.fit_transform(sample)

ValueError: Expected 2D array, got 1D array instead:
array=[1.2802751e-01 2.6823780e-01 9.9995005e-01 1.5963008e-01 0.0000000e+00
 2.4413304e-05 2.8011716e-05 5.0836106e-06 1.0750377e-05 5.8341968e-05
 9.4452289e-06 1.1353458e-06 1.4163737e-06 2.6346688e-05 1.9905863e-05
 6.8979593e-06 1.0000000e+00].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [36]:
sample.reshape(1, -1).shape

(1, 17)

In [42]:
scaler.fit_transform(sample.reshape(1, -1))

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.]])

In [43]:
scaler.fit_transform(sample.reshape(-1, 1))

array([[1.2802751e-01],
       [2.6823780e-01],
       [9.9995005e-01],
       [1.5963008e-01],
       [0.0000000e+00],
       [2.4413304e-05],
       [2.8011716e-05],
       [5.0836106e-06],
       [1.0750377e-05],
       [5.8341968e-05],
       [9.4452289e-06],
       [1.1353458e-06],
       [1.4163737e-06],
       [2.6346688e-05],
       [1.9905863e-05],
       [6.8979593e-06],
       [1.0000000e+00]])

In [63]:
sample

array([1.2802751e-01, 2.6823780e-01, 9.9995005e-01, 1.5963008e-01,
       0.0000000e+00, 2.4413304e-05, 2.8011716e-05, 5.0836106e-06,
       1.0750377e-05, 5.8341968e-05, 9.4452289e-06, 1.1353458e-06,
       1.4163737e-06, 2.6346688e-05, 1.9905863e-05, 6.8979593e-06,
       1.0000000e+00])

In [66]:
scaler.inverse_transform(scale_states(sample))

array([[1.2802751e-01, 2.6823780e-01, 9.9995005e-01, 1.5963008e-01,
        0.0000000e+00, 2.4413304e-05, 2.8011716e-05, 5.0836106e-06,
        1.0750377e-05, 5.8341968e-05, 9.4452289e-06, 1.1353458e-06,
        1.4163737e-06, 2.6346688e-05, 1.9905863e-05, 6.8979593e-06,
        1.0000000e+00]])

In [48]:
def scale_states(states: np.ndarray) -> np.ndarray:
    return scaler.fit_transform(states.reshape(1, -1))

In [68]:
sample.shape

(17,)

In [67]:
scaler.inverse_transform(scale_states(sample)).shape

(1, 17)

In [53]:
array = np.array([1, 234234, 2342434, 3423234])

In [62]:
scaler.inverse_transform(scaler.fit_transform(df_states.values))

array([[1.28027510e-01, 2.68237800e-01, 9.99950050e-01, 1.59630080e-01,
        0.00000000e+00, 2.44133040e-05, 2.80117160e-05, 5.08361060e-06,
        1.07503770e-05, 5.83419680e-05, 9.44522890e-06, 1.13534580e-06,
        1.41637370e-06, 2.63466880e-05, 1.99058630e-05, 6.89795930e-06,
        1.00000000e+00],
       [1.28010526e-01, 2.67900437e-01, 9.99747694e-01, 1.59623682e-01,
        5.00509657e-07, 7.52882916e-05, 1.21718535e-04, 1.63643508e-05,
        3.78156824e-06, 9.03912005e-05, 5.77381479e-06, 0.00000000e+00,
        3.31555057e-05, 2.80770182e-04, 1.40012184e-04, 3.20374056e-05,
        1.00000000e+00],
       [1.27936010e-01, 2.66611370e-01, 9.98599530e-01, 1.59545870e-01,
        2.90106250e-05, 3.27212680e-04, 3.01086750e-04, 1.36344620e-05,
        1.58556670e-05, 4.11705110e-04, 3.44956260e-04, 2.58228960e-05,
        8.69302970e-05, 9.96609450e-04, 7.69629200e-04, 8.76154080e-05,
        1.00000000e+00],
       [1.27358350e-01, 2.59968940e-01, 9.92449940e-01, 1.591