In [1]:
import os
import sys
import pathlib
import time

import numpy as np

In [2]:
cwd = pathlib.Path().resolve()
src = cwd.parent
root = src.parent
sys.path.append(str(src))

In [22]:
def normalize_training_data(
        save_path='data/normalized_data/',
        load_path_tra_val='data/augmented_data/',
        load_path_test='data/raw_datasets/',
        normalize_train_validation_data=True,
        normalize_augmented_train_validation_data=False,
        normalize_test_data=False,
        normalize_velocities=False,
):
    
    if normalize_train_validation_data:

        save_path_tra_val = save_path + 'tra_val/'

        letters = ['o']

        if normalize_augmented_train_validation_data:
            letters += ['a', 'b', 'c', 'd']

        for i in np.arange(1, 80+1, 1):
            
            for l in letters:

                scales = get_scalings(i, sim_letter=l, folder=load_path_tra_val, timestep=-1, include_velocities=normalize_velocities)
                DEM_path = scales['DEM']['path']
                WD_path = scales['WD']['path']

                with open(load_path_tra_val + DEM_path,'r') as file:
                    data = np.loadtxt(file)
                    data[:,-1] = (data[:,-1] - scales['DEM']['mean'])/scales['DEM']['std']
                    np.savetxt(save_path_tra_val + DEM_path, data)
                
                with open(load_path_tra_val + WD_path, 'r') as file:
                    data = np.loadtxt(file)
                    data = (data - scales['WD']['min']) / (scales['WD']['max'] - scales['WD']['min'])
                    np.savetxt(save_path_tra_val + WD_path, data)

                if normalize_velocities:
                    VX_path = scales['VX']['path']
                    VY_path = scales['VY']['path']

                    with open(load_path_tra_val + VX_path, 'r') as file:
                        data = np.loadtxt(file)
                        data = (data - scales['VX']['mean'])/scales['VX']['std']
                        np.savetxt(save_path_tra_val + VX_path, data)

                    with open(load_path_tra_val + VY_path, 'r') as file:
                        data = np.loadtxt(file)
                        data = (data - scales['VY']['mean'])/scales['VY']['std']
                        np.savetxt(save_path_tra_val + VY_path, data)
                        
            if i//10 == i/10:            
                print(f'Done with {i if not normalize_augmented_train_validation_data else 5*i}/{80 if not normalize_augmented_train_validation_data else 400} training/validation datasets')

    if normalize_test_data:
        for n in [1, 2, 3]:
            save_path_test = save_path + f'test{n}/'
            load_path_test_current = load_path_test + f'test{n}/'

            if n == 1:
                sim_numbers = np.arange(500, 519+1, 1)
            elif n == 2:
                sim_numbers = np.arange(10000, 10020+1, 1)
            elif n == 3:
                sim_numbers = np.arange(15001, 15010+1, 1)

            for i in sim_numbers:
                
                scales = get_scalings(i, sim_letter='', folder=load_path_test_current, timestep=-1, include_velocities=normalize_velocities)
                DEM_path = scales['DEM']['path']
                WD_path = scales['WD']['path']

                with open(load_path_test_current + DEM_path,'r') as file:
                    data = np.loadtxt(file)
                    data[:,-1] = (data[:,-1] - scales['DEM']['mean'])/scales['DEM']['std']
                    np.savetxt(save_path_test + DEM_path, data)
                
                with open(load_path_test_current + WD_path, 'r') as file:
                    data = np.loadtxt(file)
                    data = (data - scales['WD']['min']) / (scales['WD']['max'] - scales['WD']['min'])
                    np.savetxt(save_path_test + WD_path, data)

                if normalize_velocities:
                    VX_path = scales['VX']['path']
                    VY_path = scales['VY']['path']

                    with open(load_path_test_current + VX_path, 'r') as file:
                        data = np.loadtxt(file)
                        data = (data - scales['VX']['mean'])/scales['VX']['std']
                        np.savetxt(save_path_test + VX_path, data)

                    with open(load_path_test_current + VY_path, 'r') as file:
                        data = np.loadtxt(file)
                        data = (data - scales['VY']['mean'])/scales['VY']['std']
                        np.savetxt(save_path_test + VY_path, data)
            
            print(f'Done with test set {n}')


    return



def get_scalings(sim_number, 
                 sim_letter='o',
                 folder='data/augmented_data/',
                 timestep=-1,
                 include_velocities=False):
    """Gets mean and variance for a given simulation. These are calculated for the DEM, VX, VY, WD. 
    The statistics for the DEM are constant throughout a simulation.
    The statistics for the VX, VY, WD are calculated from the LAST timestep of a simulation.
    "folder" refers to the folder containing the DEM, VX, VY, WD folders.
    "timestep" refers to the timestep to use when calculating the normalization parameters for VX, VY, WD.
    "include_velocities": set to True in order to also calculate statistics for velocities.
    """

    stats = {}

    if len(sim_letter)>0:
        sim_letter=f'_{sim_letter}'

    DEM_path = f'DEM/DEM_{str(int(sim_number))}{str(sim_letter)}.txt'
    WD_path = f'WD/WD_{str(int(sim_number))}{str(sim_letter)}.txt'

    with open(folder + DEM_path, 'r') as file:
        data = np.loadtxt(file)[:,-1]

        mean = np.mean(data)
        std = np.std(data)
        minimum = np.min(data)
        maximum = np.max(data)

        stats['DEM'] = {'path':DEM_path,
                        'mean':mean,
                        'std':std,
                        'min':minimum,
                        'max':maximum}
        
    with open(folder + WD_path, 'r') as file:
        data = np.loadtxt(file)[timestep,:]
        # print(data)
        mean = np.mean(data)
        std = np.std(data)
        minimum = np.min(data)
        maximum = np.max(data)

        stats['WD'] = {'path':WD_path,
                       'mean':mean,
                       'std':std,
                       'min':minimum,
                       'max':maximum}

    if include_velocities:
        VX_path = f'VX/VX_{str(int(sim_number))}_{str(sim_letter)}.txt'
        VY_path = f'VY/VY_{str(int(sim_number))}_{str(sim_letter)}.txt'

        with open(folder + VX_path, 'r') as file:
            data = np.loadtxt(file)[timestep,:]

            mean = np.mean(data)
            std = np.std(data)
            minimum = np.min(data)
            maximum = np.max(data)

            stats['VX'] = {'path':VX_path,
                           'mean':mean,
                           'std':std,
                           'min':minimum,
                           'max':maximum}
            
        with open(folder + VY_path, 'r') as file:
            data = np.loadtxt(file)[timestep,:]

            mean = np.mean(data)
            std = np.std(data)
            minimum = np.min(data)
            maximum = np.max(data)

            stats['VY'] = {'path':VY_path,
                           'mean':mean,
                           'std':std,
                           'min':minimum,
                           'max':maximum}
    # print(stats)
    return stats




In [23]:
save_path_tra = str(root) + "/data/normalized_data/"
load_path_tra_val = str(root) + "/data/augmented_data/"
load_path_test = str(root) + "/data/raw_datasets/"

normalize_training_data(
        save_path=save_path_tra,
        load_path_tra_val=load_path_tra_val,
        load_path_test=load_path_test,
        normalize_train_validation_data=True,
        normalize_augmented_train_validation_data=True,
        normalize_test_data=True,
        normalize_velocities=False,
)

  data = (data - scales['WD']['min']) / (scales['WD']['max'] - scales['WD']['min'])
  data = (data - scales['WD']['min']) / (scales['WD']['max'] - scales['WD']['min'])


Done with 50/400 training/validation datasets
Done with 100/400 training/validation datasets
Done with 150/400 training/validation datasets
Done with 200/400 training/validation datasets
Done with 250/400 training/validation datasets
Done with 300/400 training/validation datasets
Done with 350/400 training/validation datasets
Done with 400/400 training/validation datasets
Done with test set 1
Done with test set 2
Done with test set 3


In [12]:
file = str(root) + "/data/raw_datasets/test1/WD/WD_500.txt"

wd_500 = np.loadtxt(file)

print(np.mean(wd_500))

0.10547118184197812


### BELOW ONLY JUNK

In [43]:


folder_path = str(root) + "/data/augmented_data/"
DEM_path = folder_path + "DEM/DEM_1_o.txt"
print(DEM_path)

with open(DEM_path, 'r') as DEM_file:
    DEM = np.loadtxt(DEM_file)[:,-1]
    # mean = np.mean()
    # std = np.std()
    
print('')


C:\Users\khdeb\Projects\DS-AI/data/augmented_data/DEM/DEM_1_o.txt



In [19]:
folder_path = str(root) + "/data/augmented_data/"
WD_path = folder_path + "WD/WD_1_o.txt"
print(WD_path)

with open(WD_path, 'r') as file:
    data = np.loadtxt(file)[:, -1]
    print(data)
    print(data.shape)
    # mean = np.mean()
    # std = np.std()

C:\Users\khdeb\Projects\DS-AI/data/augmented_data/WD/WD_1_o.txt
[0.     0.     0.     ... 0.     1.0697 0.    ]
(4096,)


In [26]:
scalers = get_scalings(1, 
                 sim_letter='o',
                 folder=folder_path,
                 timestep=-1,
                 include_velocities=False)

print(scalers['DEM']['std'])

1.1445413425418827


In [31]:
var = [1]
var += [1, 2, 3]
print(var)

[1, 1, 2, 3]


In [39]:
print(11//10==11/10)

False
