# Import Libraries

In [1]:
# OTHERS
import os
from datetime import datetime



# PANDAS LIBRARIES
from pandas import read_csv
from pandas import DataFrame
from pandas import concat



# SKLEARN LIBRARIES
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler



# Initialize dataset-directory and file-names

## &nbsp;&nbsp;- set dataset directory name

In [2]:
dataset_directory = "datasets/01 aep/"

## &nbsp;&nbsp;- set dataset train, train features, train targets, and test data filenames

In [4]:
dataset_original_file       = "{:}00 original.csv".format(dataset_directory)

dataset_train_file          = "{:}01 a train ts-1.csv".format(dataset_directory)
dataset_train_features_file = "{:}01 b train features ts-1.csv".format(dataset_directory)
dataset_train_targets_file  = "{:}01 c train targets ts-1.csv".format(dataset_directory)
dataset_test_file           = "{:}01 d test ts-1.csv".format(dataset_directory)

dataset_train_X_file = "{:}02 a train_X ts-1.csv".format(dataset_directory)
dataset_train_y_file = "{:}02 b train_y ts-1.csv".format(dataset_directory)
dataset_test_X_file  = "{:}02 c test_X ts-1.csv".format(dataset_directory)
dataset_test_y_file  = "{:}02 d test_y ts-1.csv".format(dataset_directory)

## &nbsp;&nbsp;- set dataset storage directory name

In [5]:
datenow = datetime.now().date()
folder_year = str(datenow.year)
folder_month = str(datenow.month).zfill(2)
folder_day = str(datenow.day).zfill(2)
folder_store_data = "{:}_{:}_{:}/".format(folder_year, folder_month, folder_day)
dataset_directory_store   = "{:}{:}".format(dataset_directory, folder_store_data)
print ("\n\"{:}\" folder will be used for storing the train, train features, train targets, test data.\n".format(dataset_directory_store))


"datasets/01 aep/2022_11_08/" folder will be used for storing the train, train features, train targets, test data.



## &nbsp;&nbsp;- create dataset scaled directory

In [6]:
os.makedirs(dataset_directory_store, exist_ok=True)
print ("\n\"{:}\" directory is available now. We will store the train, train features, train targets, test data in this directory.\n".format(dataset_directory_store))


"datasets/01 aep/2022_11_08/" directory is available now. We will store the train, train features, train targets, test data in this directory.



# 1. Load the Dataset, Train_X, Train_y, Test_X, and Test_y data

In [7]:
data_original = read_csv(dataset_original_file)

train        = read_csv(dataset_train_file)
test         = read_csv(dataset_test_file)
feature_vars = read_csv(dataset_train_features_file)
target_vars  = read_csv(dataset_train_targets_file)

train_X = read_csv(dataset_train_X_file)
train_y = read_csv(dataset_train_y_file)
test_X  = read_csv(dataset_test_X_file)
test_y  = read_csv(dataset_test_y_file)

In [8]:
train_X.head()

Unnamed: 0,T1,RH_1,T2,RH_2,T3,RH_3,T4,T5,RH_6,T7,RH_7,T8,RH_8,RH_9,T_out,Press_mm_hg,RH_out,Windspeed
0,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,17.166667,84.256667,17.2,41.626667,18.2,48.9,45.53,6.6,733.5,92.0,7.0
1,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,17.166667,84.063333,17.2,41.56,18.2,48.863333,45.56,6.483333,733.6,92.0,6.666667
2,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,17.166667,83.156667,17.2,41.433333,18.2,48.73,45.5,6.366667,733.7,92.0,6.333333
3,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,17.166667,83.423333,17.133333,41.29,18.1,48.59,45.4,6.25,733.8,92.0,6.0
4,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,17.2,84.893333,17.2,41.23,18.1,48.59,45.4,6.133333,733.9,92.0,5.666667


In [9]:
train_y.head()

Unnamed: 0,Appliances
0,60
1,60
2,50
3,50
4,60


# 2. Preprocessing (Scaling) data

## - function for converting data for supervised learning

In [10]:
def series_to_supervised(dataset, n_in=1, n_out=1, dropnan=True):
    num_vars = 1 if type(dataset) is list else dataset.shape[1]
    dataframe = DataFrame(dataset)
    cols, names = list(), list()

    # input sequence ( t-n, ...., t-1 )
    for i in range(n_in, 0, -1):
        cols.append(dataframe.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(num_vars)]

    for i in range(0, n_out):
        cols.append(dataframe.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(num_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(num_vars)]

    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names

    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)

    return agg

## - preparing 'values' of train_X, train_y, test_X, and test_y for scaling

In [11]:
values_train_X = train_X.values
values_train_X_shape = values_train_X.shape
values_train_y = train_y.values
values_train_y_shape = values_train_y.shape
values_test_X = test_X.values
values_test_X_shape = values_test_X.shape
values_test_y = test_y.values
values_test_y_shape = values_test_y.shape

print ("- Shape (values_train_X_shape):", values_train_X_shape)
print ("- Shape (values_train_y_shape):", values_train_y_shape)
print ("- Shape (values_test_X_shape):", values_test_X_shape)
print ("- Shape (values_test_y_shape):", values_test_y_shape)

- Shape (values_train_X_shape): (6579, 18)
- Shape (values_train_y_shape): (6579, 1)
- Shape (values_test_X_shape): (6578, 18)
- Shape (values_test_y_shape): (6578, 1)


## - scaling by either (standard, minMax) one of the approaches

In [12]:
scaling_options = ["Standard", "MinMax"]
selected_scaling = scaling_options[0] # Standard
# selected_scaling = scaling_options[1] # MinMax
# selected_scaling = scaling_options[2] # None
scaling_minmax_min = -1
scaling_minmax_max = 1

In [13]:
sc_values_train_X_scaled_df  = DataFrame()
sc_values_train_y_scaled_df  = DataFrame()
sc_values_test_X_scaled_df   = DataFrame()
sc_values_test_y_scaled_df   = DataFrame()
mmc_values_train_X_scaled_df = DataFrame()
mmc_values_train_y_scaled_df = DataFrame()
mmc_values_test_X_scaled_df  = DataFrame()
mmc_values_test_y_scaled_df  = DataFrame()

In [14]:
if (selected_scaling == "Standard"):
    sc = StandardScaler()
    
    sc_values_train_X_scaled_df = DataFrame(columns=train_X.columns , index=train_X.index)
    sc_values_train_X_scaled_df[sc_values_train_X_scaled_df.columns] = sc.fit_transform(values_train_X)
    
    sc_values_train_y_scaled_df = DataFrame(columns=train_y.columns , index=train_y.index)
    sc_values_train_y_scaled_df[sc_values_train_y_scaled_df.columns] = sc.fit_transform(values_train_y)
    
    sc_values_test_X_scaled_df = DataFrame(columns=test_X.columns , index=test_X.index)
    sc_values_test_X_scaled_df[sc_values_test_X_scaled_df.columns] = sc.fit_transform(values_test_X)
    
    sc_values_test_y_scaled_df = DataFrame(columns=test_y.columns , index=test_y.index)
    sc_values_test_y_scaled_df[sc_values_test_y_scaled_df.columns] = sc.fit_transform(values_test_y)

    print ("\n{:} is being applied\n".format(sc))

elif (selected_scaling == "MinMax"):
    mmc = MinMaxScaler(feature_range=(scaling_minmax_min, scaling_minmax_max))
    
    mmc_values_train_X_scaled_df = DataFrame(columns=train_X.columns , index=train_X.index)
    mmc_values_train_X_scaled_df[mmc_values_train_X_scaled_df.columns] = mmc.fit_transform(values_train_X)
    
    mmc_values_train_y_scaled_df = DataFrame(columns=train_y.columns , index=train_y.index)
    mmc_values_train_y_scaled_df[mmc_values_train_y_scaled_df.columns] = mmc.fit_transform(values_train_y)
    
    mmc_values_test_X_scaled_df = DataFrame(columns=test_X.columns , index=test_X.index)
    mmc_values_test_X_scaled_df[mmc_values_test_X_scaled_df.columns] = mmc.fit_transform(values_test_X)
    
    mmc_values_test_y_scaled_df = DataFrame(columns=test_y.columns , index=test_y.index)
    mmc_values_test_y_scaled_df[mmc_values_test_y_scaled_df.columns] = mmc.fit_transform(values_test_y)

    print ("\n{:} is being applied\n".format(mmc))

else:
    print ("\n{:}Scaler() is undefined".format(selected_scaling))


StandardScaler() is being applied



In [15]:
sc_values_train_X_scaled_df

Unnamed: 0,T1,RH_1,T2,RH_2,T3,RH_3,T4,T5,RH_6,T7,RH_7,T8,RH_8,RH_9,T_out,Press_mm_hg,RH_out,Windspeed
0,-0.569775,1.613672,-0.296175,1.156103,-0.767025,0.963794,-0.193810,-0.908608,-0.336051,-0.970190,0.675237,-1.321496,0.499467,0.372713,0.482919,-2.477168,0.575859,0.758543
1,-0.569775,1.352965,-0.296175,1.132976,-0.767025,0.985121,-0.193810,-0.908608,-0.354412,-0.970190,0.662222,-1.321496,0.492284,0.379908,0.453787,-2.465979,0.575859,0.641832
2,-0.569775,1.239447,-0.296175,1.100142,-0.767025,1.036069,-0.241788,-0.908608,-0.440520,-0.970190,0.637493,-1.321496,0.466165,0.365519,0.424655,-2.454789,0.575859,0.525120
3,-0.569775,1.172105,-0.296175,1.087579,-0.767025,1.059766,-0.265777,-0.908608,-0.415194,-1.026028,0.609511,-1.384276,0.438740,0.341537,0.395523,-2.443600,0.575859,0.408409
4,-0.569775,1.249067,-0.296175,1.067022,-0.767025,1.059766,-0.265777,-0.880745,-0.275586,-0.970190,0.597797,-1.384276,0.438740,0.341537,0.366391,-2.432410,0.575859,0.291698
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6574,-0.417208,-0.696378,-0.971467,-0.347982,0.074785,-1.428388,0.388465,0.187540,-0.325604,0.395045,-0.658683,-0.028212,-0.952109,-0.889515,-0.832190,-0.075164,0.821143,-1.342258
6575,-0.417208,-0.751935,-0.971467,-0.373107,0.049276,-1.487630,0.388465,0.113981,-0.307243,0.361542,-0.711060,-0.065880,-1.017407,-0.961459,-0.798897,-0.073299,0.803623,-1.342258
6576,-0.417208,-0.783681,-0.951209,-0.416505,-0.001743,-1.561090,0.366657,0.113981,-0.289673,0.361542,-0.735602,-0.065880,-1.061809,-1.009422,-0.765603,-0.071434,0.786102,-1.342258
6577,-0.417208,-0.822162,-0.951209,-0.462188,-0.070618,-1.584786,0.366657,0.113981,-0.274953,0.318467,-0.764050,-0.065880,-1.100988,-1.081367,-0.719823,-0.071434,0.680981,-1.342258


### > displaying StandardScaler() based scalings

In [16]:
sc_values_train_X_scaled_df.head()

Unnamed: 0,T1,RH_1,T2,RH_2,T3,RH_3,T4,T5,RH_6,T7,RH_7,T8,RH_8,RH_9,T_out,Press_mm_hg,RH_out,Windspeed
0,-0.569775,1.613672,-0.296175,1.156103,-0.767025,0.963794,-0.19381,-0.908608,-0.336051,-0.97019,0.675237,-1.321496,0.499467,0.372713,0.482919,-2.477168,0.575859,0.758543
1,-0.569775,1.352965,-0.296175,1.132976,-0.767025,0.985121,-0.19381,-0.908608,-0.354412,-0.97019,0.662222,-1.321496,0.492284,0.379908,0.453787,-2.465979,0.575859,0.641832
2,-0.569775,1.239447,-0.296175,1.100142,-0.767025,1.036069,-0.241788,-0.908608,-0.44052,-0.97019,0.637493,-1.321496,0.466165,0.365519,0.424655,-2.454789,0.575859,0.52512
3,-0.569775,1.172105,-0.296175,1.087579,-0.767025,1.059766,-0.265777,-0.908608,-0.415194,-1.026028,0.609511,-1.384276,0.43874,0.341537,0.395523,-2.4436,0.575859,0.408409
4,-0.569775,1.249067,-0.296175,1.067022,-0.767025,1.059766,-0.265777,-0.880745,-0.275586,-0.97019,0.597797,-1.384276,0.43874,0.341537,0.366391,-2.43241,0.575859,0.291698


In [17]:
sc_values_train_y_scaled_df.head()

Unnamed: 0,Appliances
0,-0.3525
1,-0.3525
2,-0.442328
3,-0.442328
4,-0.3525


In [18]:
sc_values_test_X_scaled_df.head()

Unnamed: 0,T1,RH_1,T2,RH_2,T3,RH_3,T4,T5,RH_6,T7,RH_7,T8,RH_8,RH_9,T_out,Press_mm_hg,RH_out,Windspeed
0,-1.380895,0.10509,-0.73746,-0.109652,-0.891122,-0.195543,-0.504048,-0.76945,1.677303,-1.190348,0.253851,-1.614691,0.16325,0.52005,-1.063911,-0.012143,0.905982,-1.259257
1,-1.346802,0.079202,-0.688932,-0.151201,-0.943375,-0.195543,-0.571589,-0.761201,1.661286,-1.190348,0.198489,-1.614691,0.216785,0.59955,-1.017405,-0.012143,0.831167,-1.259257
2,-1.346802,0.058021,-0.638093,-0.169007,-0.943375,-0.195543,-0.580031,-0.795848,1.64655,-1.190348,0.173671,-1.614691,0.224816,0.504973,-0.9709,-0.012143,0.756352,-1.259257
3,-1.380895,-0.071417,-0.638093,-0.227373,-0.943375,-0.11114,-0.537818,-0.795848,1.630533,-1.190348,0.140225,-1.614691,0.224816,0.452887,-0.924395,-0.012143,0.681537,-1.259257
4,-1.312709,-0.120839,-0.638093,-0.288707,-0.943375,-0.151332,-0.537818,-0.795848,1.565343,-1.246536,0.093491,-1.691567,0.173957,0.315818,-0.890573,-0.021389,0.556845,-1.259257


In [19]:
sc_values_test_y_scaled_df.head()

Unnamed: 0,Appliances
0,2.095086
1,2.489107
2,0.420497
3,-0.07203
4,2.981633


### > displaying MinMaxScaler() based scalings

In [20]:
mmc_values_train_X_scaled_df.head()

In [21]:
mmc_values_train_y_scaled_df.head()

In [22]:
mmc_values_test_X_scaled_df.head()

In [23]:
mmc_values_test_y_scaled_df.head()

# 3. Storing  Scaled data

## - storing scaled data

In [24]:
file_store_data_postfix = "_{:}".format(selected_scaling.lower())

In [25]:
datetimenow = datetime.now()
file_hour   = str(datetimenow.hour).zfill(2)
file_minute = str(datetimenow.minute).zfill(2)
file_second = str(datetimenow.second).zfill(2)
file_store_data_prefix = "{:}_{:}_{:} ".format(file_hour, file_minute, file_second)
print ("\n\"{:}\" will be used as a prefix for the scaled data filenames.\n".format(file_store_data_prefix))


"20_55_40 " will be used as a prefix for the scaled data filenames.



In [26]:
dataset_train_X_scaled_file = "{:}{:}03 a train_X_scaled{:} ts-1.csv".format(
    dataset_directory_store, file_store_data_prefix, file_store_data_postfix
)
dataset_train_y_scaled_file = "{:}{:}03 b train_y_scaled{:} ts-1.csv".format(
    dataset_directory_store, file_store_data_prefix, file_store_data_postfix
)
dataset_test_X_scaled_file  = "{:}{:}03 c test_X_scaled{:} ts-1.csv".format(
    dataset_directory_store, file_store_data_prefix, file_store_data_postfix
)
dataset_test_y_scaled_file  = "{:}{:}03 d test_y_scaled{:} ts-1.csv".format(
    dataset_directory_store, file_store_data_prefix, file_store_data_postfix
)

dataset_train_X_scaled_file_noprefix = "{:}03 a train_X_scaled{:} ts-1.csv".format(dataset_directory, file_store_data_postfix)
dataset_train_y_scaled_file_noprefix = "{:}03 b train_y_scaled{:} ts-1.csv".format(dataset_directory, file_store_data_postfix)
dataset_test_X_scaled_file_noprefix  = "{:}03 c test_X_scaled{:} ts-1.csv".format(dataset_directory, file_store_data_postfix)
dataset_test_y_scaled_file_noprefix  = "{:}03 d test_y_scaled{:} ts-1.csv".format(dataset_directory, file_store_data_postfix)

In [27]:
dataset_test_X_scaled_file

'datasets/01 aep/2022_11_08/20_55_40 03 c test_X_scaled_standard ts-1.csv'

In [28]:
if (selected_scaling == "Standard"):
    # storing in sub-folder with timestamp
    sc_values_train_X_scaled_df.to_csv(dataset_train_X_scaled_file, index=False)
    sc_values_train_y_scaled_df.to_csv(dataset_train_y_scaled_file, index=False)
    sc_values_test_X_scaled_df.to_csv(dataset_test_X_scaled_file, index=False)
    sc_values_test_y_scaled_df.to_csv(dataset_test_y_scaled_file, index=False)
    # storing in main folder
    sc_values_train_X_scaled_df.to_csv(dataset_train_X_scaled_file_noprefix, index=False)
    sc_values_train_y_scaled_df.to_csv(dataset_train_y_scaled_file_noprefix, index=False)
    sc_values_test_X_scaled_df.to_csv(dataset_test_X_scaled_file_noprefix, index=False)
    sc_values_test_y_scaled_df.to_csv(dataset_test_y_scaled_file_noprefix, index=False)
    print ("\nStoring the {:} based scalings".format(sc))
elif (selected_scaling == "MinMax"):
    # storing in sub-folder with timestamp
    mmc_values_train_X_scaled_df.to_csv(dataset_train_X_scaled_file, index=False)
    mmc_values_train_y_scaled_df.to_csv(dataset_train_y_scaled_file, index=False)
    mmc_values_test_X_scaled_df.to_csv(dataset_test_X_scaled_file, index=False)
    mmc_values_test_y_scaled_df.to_csv(dataset_test_y_scaled_file, index=False)
    # storing in main folder
    mmc_values_train_X_scaled_df.to_csv(dataset_train_X_scaled_file_noprefix, index=False)
    mmc_values_train_y_scaled_df.to_csv(dataset_train_y_scaled_file_noprefix, index=False)
    mmc_values_test_X_scaled_df.to_csv(dataset_test_X_scaled_file_noprefix, index=False)
    mmc_values_test_y_scaled_df.to_csv(dataset_test_y_scaled_file_noprefix, index=False)
    print ("\nStoring the {:} based scalings".format(mmc))
else:
    print ("\n{:}Scaler() is undefined".format(selected_scaling))


Storing the StandardScaler() based scalings


## - verifying (all data)

In [29]:
sc_values_train_X_scaled_df  = DataFrame()
sc_values_train_y_scaled_df  = DataFrame()
sc_values_test_X_scaled_df   = DataFrame()
sc_values_test_y_scaled_df   = DataFrame()
mmc_values_train_X_scaled_df = DataFrame()
mmc_values_train_y_scaled_df = DataFrame()
mmc_values_test_X_scaled_df  = DataFrame()
mmc_values_test_y_scaled_df  = DataFrame()

### > verifying scaled data

In [30]:
if (selected_scaling == "Standard"):
    sc_values_train_X_scaled_df = read_csv(dataset_train_X_scaled_file_noprefix)
    sc_values_train_y_scaled_df = read_csv(dataset_train_y_scaled_file_noprefix)
    sc_values_test_X_scaled_df  = read_csv(dataset_test_X_scaled_file_noprefix)
    sc_values_test_y_scaled_df  = read_csv(dataset_test_y_scaled_file_noprefix)
elif (selected_scaling == "MinMax"):
    mmc_values_train_X_scaled_df = read_csv(dataset_train_X_scaled_file_noprefix)
    mmc_values_train_y_scaled_df = read_csv(dataset_train_y_scaled_file_noprefix)
    mmc_values_test_X_scaled_df  = read_csv(dataset_test_X_scaled_file_noprefix)
    mmc_values_test_y_scaled_df  = read_csv(dataset_test_y_scaled_file_noprefix)
else:
    print ("\n{:}Scaler() is undefined".format(selected_scaling))

In [31]:
sc_values_train_X_scaled_df.head()

Unnamed: 0,T1,RH_1,T2,RH_2,T3,RH_3,T4,T5,RH_6,T7,RH_7,T8,RH_8,RH_9,T_out,Press_mm_hg,RH_out,Windspeed
0,-0.569775,1.613672,-0.296175,1.156103,-0.767025,0.963794,-0.19381,-0.908608,-0.336051,-0.97019,0.675237,-1.321496,0.499467,0.372713,0.482919,-2.477168,0.575859,0.758543
1,-0.569775,1.352965,-0.296175,1.132976,-0.767025,0.985121,-0.19381,-0.908608,-0.354412,-0.97019,0.662222,-1.321496,0.492284,0.379908,0.453787,-2.465979,0.575859,0.641832
2,-0.569775,1.239447,-0.296175,1.100142,-0.767025,1.036069,-0.241788,-0.908608,-0.44052,-0.97019,0.637493,-1.321496,0.466165,0.365519,0.424655,-2.454789,0.575859,0.52512
3,-0.569775,1.172105,-0.296175,1.087579,-0.767025,1.059766,-0.265777,-0.908608,-0.415194,-1.026028,0.609511,-1.384276,0.43874,0.341537,0.395523,-2.4436,0.575859,0.408409
4,-0.569775,1.249067,-0.296175,1.067022,-0.767025,1.059766,-0.265777,-0.880745,-0.275586,-0.97019,0.597797,-1.384276,0.43874,0.341537,0.366391,-2.43241,0.575859,0.291698


In [32]:
sc_values_train_y_scaled_df.head()

Unnamed: 0,Appliances
0,-0.3525
1,-0.3525
2,-0.442328
3,-0.442328
4,-0.3525


In [33]:
sc_values_test_X_scaled_df.head()

Unnamed: 0,T1,RH_1,T2,RH_2,T3,RH_3,T4,T5,RH_6,T7,RH_7,T8,RH_8,RH_9,T_out,Press_mm_hg,RH_out,Windspeed
0,-1.380895,0.10509,-0.73746,-0.109652,-0.891122,-0.195543,-0.504048,-0.76945,1.677303,-1.190348,0.253851,-1.614691,0.16325,0.52005,-1.063911,-0.012143,0.905982,-1.259257
1,-1.346802,0.079202,-0.688932,-0.151201,-0.943375,-0.195543,-0.571589,-0.761201,1.661286,-1.190348,0.198489,-1.614691,0.216785,0.59955,-1.017405,-0.012143,0.831167,-1.259257
2,-1.346802,0.058021,-0.638093,-0.169007,-0.943375,-0.195543,-0.580031,-0.795848,1.64655,-1.190348,0.173671,-1.614691,0.224816,0.504973,-0.9709,-0.012143,0.756352,-1.259257
3,-1.380895,-0.071417,-0.638093,-0.227373,-0.943375,-0.11114,-0.537818,-0.795848,1.630533,-1.190348,0.140225,-1.614691,0.224816,0.452887,-0.924395,-0.012143,0.681537,-1.259257
4,-1.312709,-0.120839,-0.638093,-0.288707,-0.943375,-0.151332,-0.537818,-0.795848,1.565343,-1.246536,0.093491,-1.691567,0.173957,0.315818,-0.890573,-0.021389,0.556845,-1.259257


In [34]:
sc_values_test_y_scaled_df.head()

Unnamed: 0,Appliances
0,2.095086
1,2.489107
2,0.420497
3,-0.07203
4,2.981633


In [35]:
mmc_values_train_X_scaled_df.head()

In [36]:
mmc_values_test_X_scaled_df.head()

In [37]:
mmc_values_train_y_scaled_df.head()

In [38]:
mmc_values_test_y_scaled_df.head()