In [5]:
import os
import numpy as np
import math
import pandas as pd
import shutil

print('Make sure to confirm that the following path is the correct working directory')
print(os.getcwd())

Make sure to confirm that the following path is the correct working directory
/Users/dsnyder/Documents/GitHub/wind-model/NateRegression


In [6]:
# Define speeds (m/s) corresponding to 0-40 Hz settings in wind tunnel
### (Just for record-keeping)
hzArray = np.array((0, 5, 10, 15, 20, 25, 30, 35, 40))
speeds = np.array((0.00, 1.26363735, 1.58562983, 2.07066356, 2.571993, 3.18291372, 3.75322345, 4.33626595, 4.91413509))
###



In [13]:
'''
Master Import Block (2-Degree Increments) [I of II]

Because the data is so large, we assume that at any point, only a single 
speed's worth of data is available (i.e. 180 files + the relevant 0-speed readings for a single, fixed Hz). 
This can be toggled by increasing the nVel parameter, but is NOT recommended (i.e., keep nVel = 1). 


PARAMETERS: 

startIdx -- int -- in {1, ..., 8}, represents the lowest Hz we are importing; Hz value is 5*startIdx. 
nVel     -- int -- in {1, ..., 8}, represents how many sequential Hz we are importing

nTrain   -- int -- number of training points at EACH velocity and angle to end up with
nTest    -- int -- number of testing points at EACH velocity and angle to end up with
N        -- int -- number of points to average over PER datum (effective sampling rate is nominal_rate/N)
numAng   -- int -- number of angle increments present in the data; should be a divisor of 360
numVel   -- int -- number of different Hz total
M        -- int -- number of rows of each file to sample (smaller = faster)

[TODO]: add a moving-average filter as a possible modification to the data input reading / filtering problem.


RESULT: 

Output (after manually cycling through numVel instances) is equal to numVel files of mags, angs, angsrad, 
and readings. These are combined in the next block. 
'''

# raise ValueError('Do not run if you already have the csv data!')

# raise ValueError('Make sure to confirm file path at bottom of this block!')

#
# BEGIN Parameters to alter by the user
#

oneGo = True
# Designed to be iterable by simply changing startIdx with everything else fixed

startIdx = 1
nVel = 8

# Choose geometry (nKeep: hexagon = 6, pentagon = 5, square = 4, triangle = 3...)
nInputs = 6
geo = 'hex/'
nKeep = 6

nTrain = 500
nValidate = 200
nTest = 200
N = 5              # Number of points to average over
deltaN = 20        # How many to skip per sample --> Must be AT LEAST as big as N!
numAng = 180       # For 10-degree increments set this to 36 instead of 180
numVel = 8

np.random.seed(12345*N)

dataPath = 'data/'
trainPath = 'fooTrain/N'+str(N)+'/'
testPath = 'fooTest/N'+str(N)+'/'
valPath = 'fooVal/N'+str(N)+'/'

outTrainPath = 'compTrain_N'+str(N)+'/'+geo
outTestPath = 'compTest_N'+str(N)+'/'+geo
outValPath = 'compVal_N'+str(N)+'/'+geo
#
# END Parameters to alter by the user
#

#
# BEGIN derived parameters
# 
numTrain = nTrain*(deltaN+1)         # End up with nTrain train points which represent averages over N consecutive samples
numValidate = nValidate*(deltaN+1)   # End up with nValidate validation points (each averages over N consecutive samples)
numTest = nTest*(deltaN+1)           # End up with nTest test points which represent averages over N consecutive samples

M = 6000 + numTrain + numValidate + numTest

skip1 = 1000 + int(np.floor(np.random.rand()*800))
skip2 = 1000 + int(np.floor(np.random.rand()*800))

kAng = int(360/numAng)
kVel = int(40/numVel)

RTL = M - numTrain - numValidate - numTest - 4000
RTH = RTL + numTest
RL0 = RTH + skip1
RH0 = RL0 + numValidate
RL = RH0 + skip2
RH = RL + numTrain

if RH > M:
    # Error checking. We want to min M (faster read-in, less wasteful), but also need to sample and 
    # leave some offset between train and test to prevent overfitting to temporal correlations
    raise ValueError('RH larger than max_rows (M)! Need to increase M or reduce {N, nTrain, nTest}')

# 
# END derived parameters 
# 

#
# BEGIN storage arrays to be written out at the end
# 
df = np.zeros((numAng*nVel*nTrain, nKeep))
dfT = np.zeros((numAng*nVel*nTest, nKeep))
dfV = np.zeros((numAng*nVel*nValidate, nKeep))
mags = np.zeros(numAng*nVel*nTrain)
angs = np.zeros(numAng*nVel*nTrain)
magsT = np.zeros(numAng*nVel*nTest)
angsT = np.zeros(numAng*nVel*nTest)
magsV = np.zeros(numAng*nVel*nValidate)
angsV = np.zeros(numAng*nVel*nValidate)


tmpf = np.zeros((nTrain, nKeep))
tmpft = np.zeros((nTest, nKeep))
tmpfv = np.zeros((nValidate, nKeep))
#
# END storage arrays to be written out at the end
# 

# Iterate over the set of velocities (startIdx to startIdx + nVel)
for hz in range(startIdx, startIdx+nVel):
    # Import the zero-speed data as to difference from the data with velocity
    if nKeep == 5:
        zeroDataStr = 'zeros'+str(hz)+'_angle_0_samples_30000_frequency_1000_motor_0_Hz.txt'
    elif nKeep == 6:
        zeroDataStr = 'calibData'+str(hz)+'_angle_0_samples_30000_frequency_1000_motor_0_Hz.txt'
    else: 
        raise ValueError('Unsupported nKeep as of June 27 2022')
        
    # For hexagon: 'calibData_angle_0_samples_30000_frequency_1000_motor_0_Hz.txt'
    # For pentagon: 'zeros'+str(hz)+'_angle_0_samples_30000_frequency_1000_motor_0_Hz.txt'
    
    txtString2 = dataPath+zeroDataStr
    df1 = np.loadtxt(txtString2, skiprows=1, max_rows=M, delimiter=',')

    # Keep tabs on progress by printing hz value
    print(hz)
    print()
    # Iterate over the set of angles (numAng)
    # Assumed to be saved in even degree intervals (e.g. 10 degrees, 2 degrees, etc)
    for ang in range(numAng): 
        # Load the data and difference with zero-velocity readings
        # For Hexagon:
        # txtString = dataPath+'calibData_angle_' + str(int(ang*kAng)) + '_samples_30000_frequency_1000_motor_'+str(int(hz*kVel))+'_Hz.txt'
        # For Pentagon: 
        # txtString = dataPath+'data_degInc_2_angle_' + str(int(ang*kAng)) + '_samples_30000_frequency_1000_motor_'+str(int(hz*kVel))+'_Hz.txt'
        if nKeep == 5:
            txtString = dataPath+'data_degInc_2_angle_' + str(int(ang*kAng)) + '_samples_30000_frequency_1000_motor_'+str(int(hz*kVel))+'_Hz.txt'
        elif nKeep == 6:
            txtString = dataPath+'calibData_angle_' + str(int(ang*kAng)) + '_samples_30000_frequency_1000_motor_'+str(int(hz*kVel))+'_Hz.txt'
        else: 
            raise ValueError('Unsupported nKeep as of June 27 2022 - must be 5 or 6')
        
        df0 = np.loadtxt(txtString, skiprows=1, max_rows=M, delimiter=',')
        
        # Pick out training, validation, and test data segments
        tmpf0 = (df0[RL:RH,:nKeep] - df1[RL:RH,:nKeep])
        tmpft0 = (df0[RTL:RTH,:nKeep] - df1[RTL:RTH,:nKeep])
        tmpfv0 = (df0[RL0:RH0,:nKeep] - df1[RL0:RH0,:nKeep])
        
        # Perform the filtering (averaging), condensing from {numTrain, numTest} to {nTrain, nTest}
        for k in range(nTrain):
            tmpf[k,:] = np.mean(tmpf0[deltaN*k:deltaN*k+N,:], axis=0)
            if k < nTest:
                tmpft[k,:] = np.mean(tmpft0[deltaN*k:deltaN*k+N,:], axis=0)
            if k < nValidate:
                tmpfv[k,:] = np.mean(tmpfv0[deltaN*k:deltaN*k+N,:], axis=0)
        
        # Put the (nTrain, 6), (nTest, 6) arrays into the larger df, dfT arrays in the right spot
        # This represents the train/test features (X)
        df[(numAng*nTrain*(hz-startIdx)+ang*nTrain):(numAng*nTrain*(hz-startIdx)+(ang+1)*nTrain),:nKeep] = tmpf
        dfT[(numAng*nTest*(hz-startIdx)+ang*nTest):(numAng*nTest*(hz-startIdx)+(ang+1)*nTest),:nKeep] = tmpft
        dfV[(numAng*nValidate*(hz-startIdx)+ang*nValidate):(numAng*nValidate*(hz-startIdx)+(ang+1)*nValidate),:nKeep] = tmpfv
        
        # Put the (ntrain, ) (nTest, ) arrays into the larger angs, mags, angsT, magsT in the right spot
        # This represents the train/test labels (y)
        angs[(numAng*nTrain*(hz-startIdx)+ang*nTrain):(numAng*nTrain*(hz-startIdx)+(ang+1)*nTrain)] = ang*kAng*np.ones(nTrain) # Angles in degrees
        mags[(numAng*nTrain*(hz-startIdx)+ang*nTrain):(numAng*nTrain*(hz-startIdx)+(ang+1)*nTrain)] = speeds[hz]*np.ones(nTrain)
        angsT[(numAng*nTest*(hz-startIdx)+ang*nTest):(numAng*nTest*(hz-startIdx)+(ang+1)*nTest)] = ang*kAng*np.ones(nTest) # Angles in degrees
        magsT[(numAng*nTest*(hz-startIdx)+ang*nTest):(numAng*nTest*(hz-startIdx)+(ang+1)*nTest)] = speeds[hz]*np.ones(nTest)
        angsV[(numAng*nValidate*(hz-startIdx)+ang*nValidate):(numAng*nValidate*(hz-startIdx)+(ang+1)*nValidate)] = ang*kAng*np.ones(nValidate) # Angles in degrees
        magsV[(numAng*nValidate*(hz-startIdx)+ang*nValidate):(numAng*nValidate*(hz-startIdx)+(ang+1)*nValidate)] = speeds[hz]*np.ones(nValidate)
        

    
'''
Once we have finished, write the results to files
This allows us to index files by the idx of the run
Here, we assume nVel = 1
'''
if oneGo:
    np.savetxt(outTrainPath+'readings.csv', df, delimiter=',')
    np.savetxt(outTrainPath+'mags.csv', mags, delimiter=',')
    np.savetxt(outTrainPath+'angs.csv', angs, delimiter=',')

    np.savetxt(outTestPath+'readings.csv', dfT, delimiter=',')
    np.savetxt(outTestPath+'mags.csv', magsT, delimiter=',')
    np.savetxt(outTestPath+'angs.csv', angsT, delimiter=',')

    np.savetxt(outValPath+'readings.csv', dfV, delimiter=',')
    np.savetxt(outValPath+'mags.csv', magsV, delimiter=',')
    np.savetxt(outValPath+'angs.csv', angsV, delimiter=',')
    
    np.savetxt(outTrainPath+'angsrad.csv', angs*math.pi/180.0, delimiter=',')
    np.savetxt(outTestPath+'angsrad.csv', angsT*math.pi/180.0, delimiter=',')
    np.savetxt(outValPath+'angsrad.csv', angsV*math.pi/180.0, delimiter=',')
    
else:
    np.savetxt(trainPath+'readings'+str(startIdx)+'.csv', df, delimiter=',')
    np.savetxt(trainPath+'mags'+str(startIdx)+'.csv', mags, delimiter=',')
    np.savetxt(trainPath+'angs'+str(startIdx)+'.csv', angs, delimiter=',')

    np.savetxt(testPath+'readings'+str(startIdx)+'.csv', dfT, delimiter=',')
    np.savetxt(testPath+'mags'+str(startIdx)+'.csv', magsT, delimiter=',')
    np.savetxt(testPath+'angs'+str(startIdx)+'.csv', angsT, delimiter=',')

    np.savetxt(valPath+'readings'+str(startIdx)+'.csv', dfV, delimiter=',')
    np.savetxt(valPath+'mags'+str(startIdx)+'.csv', magsV, delimiter=',')
    np.savetxt(valPath+'angs'+str(startIdx)+'.csv', angsV, delimiter=',')


1

2

3

4

5

6

7

8



In [None]:
'''
Master Import Block (2-Degree Increments) [II of II]

Because the data is so large, we assume that at any point, only a single 
speed's worth of data is available (i.e. 180 files + the relevant 0-speed readings for a single, fixed Hz). 
This can be toggled by increasing the nVel parameter, but is NOT recommended (i.e., keep nVel = 1). 


INPUT: 

numVel files each of mags, angs, angsrad, and readings, named with iterated numbers from 1 to numVel. 

REQUIREMENTS: 

Parameters from Master Block I in the workspace, specifically: 

     nTrain
     nTest
     numAng
     numVel

OUTPUT: 

A single concatenated file for each of mags, angs, angsrad, and readings, which preserves proper labeling. 
'''

# raise ValueError('Confirm that you actually need to run this function!')

# raise ValueError('Check the path of the written files, below!')
dataPath = 'data/'
trainPath = 'fooTrain/N'+str(N)+'/'
testPath = 'fooTest/N'+str(N)+'/'
valPath = 'fooVal/N'+str(N)+'/'

geo = 'pent/'

outTrainPath = 'compTrain_N'+str(N)+'/'+geo
outTestPath = 'compTest_N'+str(N)+'/'+geo
outValPath = 'compVal_N'+str(N)+'/'+geo

if oneGo:
        
    angs = (pd.read_csv(trainPath+'angs'+str(1)+'.csv', header=None)).to_numpy()[:,0]
    angsT = (pd.read_csv(testPath+'angs'+str(1)+'.csv', header=None)).to_numpy()[:,0]
    angsV = (pd.read_csv(valPath+'angs'+str(1)+'.csv', header=None)).to_numpy()[:,0]

    #np.savetxt(outTrainPath+'angs.csv', angs, delimiter=',')
    np.savetxt(outTrainPath+'angsrad.csv', angs*math.pi/180.0, delimiter=',')
    #np.savetxt(outTestPath+'angs.csv', angsT, delimiter=',')
    np.savetxt(outTestPath+'angsrad.csv', angsT*math.pi/180.0, delimiter=',')
    #np.savetxt(outValPath+'angs.csv', angsV, delimiter=',')
    np.savetxt(outValPath+'angsrad.csv', angsV*math.pi/180.0, delimiter=',')
else:

    df = np.zeros((numAng*numVel*nTrain, nKeep))
    dfT = np.zeros((numAng*numVel*nTest, nKeep))
    dfV = np.zeros((numAng*numVel*nValidate, nKeep))
    
    angs = np.zeros(numAng*numVel*nTrain)
    angsT = np.zeros(numAng*numVel*nTest)
    angsV = np.zeros(numAng*numVel*nValidate)

    mags = np.zeros(numAng*numVel*nTrain)
    magsT = np.zeros(numAng*numVel*nTest)
    magsV = np.zeros(numAng*numVel*nValidate)


    for k in range(1, 1+numVel):
        print(k)
        df[numAng*nTrain*(k-1):numAng*nTrain*k,:] = (pd.read_csv(trainPath+'readings'+str(k)+'.csv', header=None)).to_numpy()
        mags[numAng*nTrain*(k-1):numAng*nTrain*k] = (pd.read_csv(trainPath+'mags'+str(k)+'.csv', header=None)).to_numpy()[:,0]
        angs[numAng*nTrain*(k-1):numAng*nTrain*k] = (pd.read_csv(trainPath+'angs'+str(k)+'.csv', header=None)).to_numpy()[:,0]

        dfT[numAng*nTest*(k-1):numAng*nTest*k,:] = (pd.read_csv(testPath+'readings'+str(k)+'.csv', header=None)).to_numpy()
        magsT[numAng*nTest*(k-1):numAng*nTest*k] = (pd.read_csv(testPath+'mags'+str(k)+'.csv', header=None)).to_numpy()[:,0]
        angsT[numAng*nTest*(k-1):numAng*nTest*k] = (pd.read_csv(testPath+'angs'+str(k)+'.csv', header=None)).to_numpy()[:,0]

        dfV[numAng*nValidate*(k-1):numAng*nValidate*k,:] = (pd.read_csv(valPath+'readings'+str(k)+'.csv', header=None)).to_numpy()
        magsV[numAng*nValidate*(k-1):numAng*nValidate*k] = (pd.read_csv(valPath+'mags'+str(k)+'.csv', header=None)).to_numpy()[:,0]
        angsV[numAng*nValidate*(k-1):numAng*nValidate*k] = (pd.read_csv(valPath+'angs'+str(k)+'.csv', header=None)).to_numpy()[:,0]


    
    # Save the files in the desired format. 
    # The old files loaded above can be kept or manually removed
    print('Saving files!')

    np.savetxt(outTrainPath+'readings.csv', df, delimiter=',')
    np.savetxt(outTrainPath+'mags.csv', mags, delimiter=',')
    np.savetxt(outTrainPath+'angs.csv', angs, delimiter=',')
    np.savetxt(outTrainPath+'angsrad.csv', angs*math.pi/180.0, delimiter=',')

    np.savetxt(outTestPath+'readings.csv', dfT, delimiter=',')
    np.savetxt(outTestPath+'mags.csv', magsT, delimiter=',')
    np.savetxt(outTestPath+'angs.csv', angsT, delimiter=',')
    np.savetxt(outTestPath+'angsrad.csv', angsT*math.pi/180.0, delimiter=',')

    np.savetxt(outValPath+'readings.csv', dfV, delimiter=',')
    np.savetxt(outValPath+'mags.csv', magsV, delimiter=',')
    np.savetxt(outValPath+'angs.csv', angsV, delimiter=',')
    np.savetxt(outValPath+'angsrad.csv', angsV*math.pi/180.0, delimiter=',')

In [20]:
# Make the (synthetic) triangle data from hexagon data
# Copy the angs, mags, angsrad; take every other reading column

N = 5

geo0 = 'hex/'
trainPath = 'compTrain_N'+str(N)+'/'+geo0
testPath = 'compTest_N'+str(N)+'/'+geo0
valPath = 'compVal_N'+str(N)+'/'+geo0

geo = 'tri/'

outTrainPath = 'compTrain_N'+str(N)+'/'+geo
outTestPath = 'compTest_N'+str(N)+'/'+geo
outValPath = 'compVal_N'+str(N)+'/'+geo


shutil.copy(trainPath+'angs.csv', outTrainPath+'angs.csv')
shutil.copy(testPath+'angs.csv', outTestPath+'angs.csv')
shutil.copy(valPath+'angs.csv', outValPath+'angs.csv')

shutil.copy(trainPath+'angsrad.csv', outTrainPath+'angsrad.csv')
shutil.copy(testPath+'angsrad.csv', outTestPath+'angsrad.csv')
shutil.copy(valPath+'angsrad.csv', outValPath+'angsrad.csv')

shutil.copy(trainPath+'mags.csv', outTrainPath+'mags.csv')
shutil.copy(testPath+'mags.csv', outTestPath+'mags.csv')
shutil.copy(valPath+'mags.csv', outValPath+'mags.csv')

df0 = (pd.read_csv(trainPath+'readings.csv', header=None)).to_numpy()
df = df0[:,np.array([0, 2, 4])]
np.savetxt(outTrainPath+'readings.csv', df, delimiter=',')

df0 = (pd.read_csv(testPath+'readings.csv', header=None)).to_numpy()
df = df0[:,np.array([0, 2, 4])]
np.savetxt(outTestPath+'readings.csv', df, delimiter=',')

df0 = (pd.read_csv(valPath+'readings.csv', header=None)).to_numpy()
df = df0[:,np.array([0, 2, 4])]
np.savetxt(outValPath+'readings.csv', df, delimiter=',')

In [32]:
# Make synthetic square data from hexagon data 
# Idea: use 0 and 3; make 1 and 4 shift by 15 of the 180 slots. 
### Split into 8 parts of length 90000 (36000)
### For each part, shift by 15*500 = 7500 (15*200 = 3000)
'''
print(180*500)
print(180*200)
print(15*500)
print(15*200)
'''

N = 5

geo0 = 'hex/'
trainPath = 'compTrain_N'+str(N)+'/'+geo0
testPath = 'compTest_N'+str(N)+'/'+geo0
valPath = 'compVal_N'+str(N)+'/'+geo0

geo = 'squ/'

outTrainPath = 'compTrain_N'+str(N)+'/'+geo
outTestPath = 'compTest_N'+str(N)+'/'+geo
outValPath = 'compVal_N'+str(N)+'/'+geo

df0 = (pd.read_csv(trainPath+'readings.csv', header=None)).to_numpy()
df = df0[:,np.array([0, 1, 3, 4])]
dfNew1 = np.zeros(df.shape[0])
dfNew3 = np.zeros(df.shape[0])
for kk in range(8):
    tmp1 = np.roll(df[numAng*nTrain*kk:numAng*nTrain*(kk+1),1],-15*nTrain)
    tmp3 = np.roll(df[numAng*nTrain*kk:numAng*nTrain*(kk+1),3],-15*nTrain)
    dfNew1[numAng*nTrain*kk:numAng*nTrain*(kk+1)] = tmp1
    dfNew3[numAng*nTrain*kk:numAng*nTrain*(kk+1)] = tmp3

# put in dfNew1 and dfNew3 into df
df[:,1] = dfNew1
df[:,3] = dfNew3
np.savetxt(outTrainPath+'readings.csv', df, delimiter=',')

df0 = (pd.read_csv(testPath+'readings.csv', header=None)).to_numpy()
df = df0[:,np.array([0, 1, 3, 4])]
dfNew1 = np.zeros(df.shape[0])
dfNew3 = np.zeros(df.shape[0])
for kk in range(8):
    tmp1 = np.roll(df[numAng*nTest*kk:numAng*nTest*(kk+1),1],-15*nTest)
    tmp3 = np.roll(df[numAng*nTest*kk:numAng*nTest*(kk+1),3],-15*nTest)
    dfNew1[numAng*nTest*kk:numAng*nTest*(kk+1)] = tmp1
    dfNew3[numAng*nTest*kk:numAng*nTest*(kk+1)] = tmp3

# put in dfNew1 and dfNew3 into df
df[:,1] = dfNew1
df[:,3] = dfNew3
np.savetxt(outTestPath+'readings.csv', df, delimiter=',')

df0 = (pd.read_csv(valPath+'readings.csv', header=None)).to_numpy()
df = df0[:,np.array([0, 1, 3, 4])]
dfNew1 = np.zeros(df.shape[0])
dfNew3 = np.zeros(df.shape[0])
for kk in range(8):
    tmp1 = np.roll(df[numAng*nValidate*kk:numAng*nValidate*(kk+1),1],-15*nValidate)
    tmp3 = np.roll(df[numAng*nValidate*kk:numAng*nValidate*(kk+1),3],-15*nValidate)
    dfNew1[numAng*nValidate*kk:numAng*nValidate*(kk+1)] = tmp1
    dfNew3[numAng*nValidate*kk:numAng*nValidate*(kk+1)] = tmp3

# put in dfNew1 and dfNew3 into df
df[:,1] = dfNew1
df[:,3] = dfNew3
np.savetxt(outValPath+'readings.csv', df, delimiter=',')

shutil.copy(trainPath+'angs.csv', outTrainPath+'angs.csv')
shutil.copy(testPath+'angs.csv', outTestPath+'angs.csv')
shutil.copy(valPath+'angs.csv', outValPath+'angs.csv')

shutil.copy(trainPath+'angsrad.csv', outTrainPath+'angsrad.csv')
shutil.copy(testPath+'angsrad.csv', outTestPath+'angsrad.csv')
shutil.copy(valPath+'angsrad.csv', outValPath+'angsrad.csv')

shutil.copy(trainPath+'mags.csv', outTrainPath+'mags.csv')
shutil.copy(testPath+'mags.csv', outTestPath+'mags.csv')
shutil.copy(valPath+'mags.csv', outValPath+'mags.csv')


'compVal_N5/squ/mags.csv'