In [1]:
import pandas as pd 
import numpy as np 

In [2]:
def min_max_normalize(train, test):
    """ Returns the min-max normalzation of the train and test data
    Parameters: 
    - train: array-like. Train dataset 
    - test: array-like. Test dataset

    Return:
    - train min-max normalization
    - test min-max normalization 
    - min value for train. float or array-like
    - max value for train. float or array-like
    """
    train_copy = train.copy(deep = True)
    test_copy = test.copy(deep = True)

    min_x = train.min(axis = 0)
    max_x = train.max(axis = 0)
    z_train = (train_copy - min_x) / (max_x - min_x)
    z_test = (test_copy - min_x) / (max_x - min_x)
    return z_train, z_test, min_x, max_x 

def unnormalize_mix_max(normalized_prediction, min_x, max_x):
    " Unnormlaize a normalize number or array"
    #TO DO: Add assert Shape (n,j) of normalized must be the same on min_x [n,] or max_x[n,] 

    return normalized_prediction * (max_x - min_x) + min_x

In [6]:
## Example: 
stations = 10 
observations = 100
numpy_array = np.random.randint(low = 0, high=10000, size = (observations, stations))
np_train = numpy_array[:80,:]
np_test = numpy_array[80:,:]
df = pd.DataFrame(numpy_array)
df_train = df.iloc[:80,:]
df_test = df.iloc[80:,:]

## Test 

In [42]:
###############################
## Test for a single series ##
###############################
s_train = df_train[0] 
s_test = df_test[0]

# Normalization Function
z_train_f, z_test_f, min_x_f, max_x_f = min_max_normalize(s_train, s_test)

# Manual
min_x_m, max_x_m = s_train.min(), s_train.max()
z_train_m = (s_train - min_x_m)/(max_x_f - min_x_m)
z_test_m = (s_test - min_x_m)/(max_x_f - min_x_m)

# Test 
assert z_train_f.equals(z_train_m)
assert z_test_f.equals(z_test_m)
assert min_x_f == min_x_m
assert max_x_f == max_x_m

# Unormalized funtions
train_u_f = unnormalize_mix_max(z_train_f, min_x_f, max_x_f).astype(int)
test_u_f = unnormalize_mix_max(z_test_f, min_x_f, max_x_f).astype(int)


In [45]:
###############################
## Test for a full DataFrame ##
###############################
s_train = df_train[0] 
s_test = df_test[0]

# Function
z_train_f, z_test_f, min_x_f, max_x_f = min_max_normalize(df_train, df_test)

# Manual
min_x_m, max_x_m = df_train.min(), df_train.max()
z_train_m = (df_train - min_x_m)/(max_x_f - min_x_m)
z_test_m = (df_test - min_x_m)/(max_x_f - min_x_m)

# Test 
assert z_train_f.equals(z_train_m)
assert z_test_f.equals(z_test_m)
assert min_x_f.equals(min_x_m)
assert min_x_f.shape[0] == df_train.shape[1]
assert max_x_f.equals(max_x_m)
assert max_x_f.shape[0] == df_train.shape[1]

In [47]:
train_u_f = unnormalize_mix_max(z_train_f, min_x_f, max_x_f)
test_u_f = unnormalize_mix_max(z_test_f, min_x_f, max_x_f)