In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm
import sys
import random
from sklearn.preprocessing import StandardScaler

sys_epsilon = sys.float_info.epsilon

# Data Preprocessing

In [2]:
paths = ["/home/hmarefat/scratch/torchFOAM/Case_dS/postProcessing/fieldData.dat",
         "/home/hmarefat/scratch/torchFOAM/Case_dS_R53/postProcessing/fieldData.dat",
         "/home/hmarefat/scratch/torchFOAM/Case_dS_R4/postProcessing/fieldData.dat"]
nCols = 26
names = ['_R3', '_R53', '_R4']

In [3]:
def returnRandmIndx(maxIndx, count):
    return np.array(random.sample(range(1,maxIndx), count))

In [4]:
random.seed(35)

count = 1000000
Indx_R3 = returnRandmIndx(4956526, count)
Indx_R53 = returnRandmIndx(5900626, count)
Indx_R4 = returnRandmIndx(4012426, count)

In [5]:
Indx_R3[0:10]

array([4602784, 2813286, 1104994, 2856052, 1292255, 2398361, 3621576,
       2115176, 4754160,  518511])

In [6]:
random.shuffle(Indx_R3)

In [7]:
Indx_R3[0:10]

array([1433998, 3617811, 4647835, 4488912, 3146260, 4916233, 2427443,
       3525259, 1800030,  748959])

In [8]:
def splitterIndx(indx):
    seen = indx[:int(0.7*indx.shape[0])]
    unseen = indx[int(0.7*indx.shape[0]):]
    
    return seen, unseen

In [9]:
index_data = []

for indx in [Indx_R3, Indx_R53, Indx_R4]:
    seen, unseen = splitterIndx(indx)
    index_data.append((seen, unseen))

index_array = np.array(index_data, dtype=object)

In [11]:
index_array[0,0].shape, index_array[0,1].shape

((700000,), (300000,))

In [12]:
def scaler(name, ds):
    scaler = StandardScaler()
    scaler.fit(ds)
    np.savetxt(f'../processedDatasets/{name}_means.txt',scaler.mean_)
    np.savetxt(f'../processedDatasets/{name}_scales.txt',scaler.scale_)
    ds_norm = scaler.transform(ds)   
    np.savetxt(f'../processedDatasets/{name}_norm.txt', ds_norm, fmt='%.18e')
    np.savetxt(f'../processedDatasets/{name}.txt', ds, fmt='%.18e')

In [None]:
data = []

for i, path in enumerate(paths):
    name = path.split('/')[-1][:-4]+names[i]
    print(f'Starting to preprocess dataset {name}')
    data.clear()
    
    with open(path, "r") as f:
        next(f)
        for line in f:
            l = line.split()
            try:
                data.append([float(x) for x in l]) #(l)
            except ValueError as e:
                print(f"Error converting line to float: {line.strip()} - {e}")
                continue
                
    #data.pop(0)
    print('Reading raw file is done!')
    ds = np.array(data) 
    
    ds_seen = ds[index_array[i,0]]
    ds_unseen = ds[index_array[i,1]]
    
    scaler(name+'_seen', ds_seen)
    scaler(name+'_unseen', ds_unseen)
    
    print('\n')

Starting to preprocess dataset fieldData_R3
