    This notebook takes in a log file, cleans it, adds data we want for our model, and outputs it to a log file
    Justin Wasserman - 

## Import and Verify Datalog

In [303]:
import pandas as pd
import numpy as np
import math

In [304]:
datalog_DIR = '../../data/'

In [305]:
datalogFile = datalog_DIR + '02-11-2019_13-08-38.csv'
#Use error_bad_lines to fill in blanks as NA
#The WallIds should be only to have NaN

df = pd.read_csv(datalogFile, sep=',')
df.head()

Unnamed: 0,Time,ID,X,Y,Yaw,ResetID,checkCorrectness,NumberOfWalls,WallId(s)
0,0 1000000,0,0.0,0.0,0.0,1,1,0,
1,0 2000000,0,0.0,0.0,0.0,1,1,0,
2,0 3000000,0,0.0,0.0,0.0,1,1,0,
3,0 4000000,0,0.0,0.0,0.0,1,1,0,
4,0 5000000,0,0.0,0.0,0.0,1,1,0,


In [306]:
#Drop last row in df, sometimes datalog will be stopped while writing numbers to log
#which will cause NaNs to be inserted.So it is just best to drop the last row.
df.drop(df.tail(1).index,inplace=True) # drop last row

In [307]:
#Verify that only WallId(s) has NaN in it
NaNs = df.isnull().any() #Checks which columns have an NA in it
if(NaNs.where(NaNs == True).sum() != 1.0 and NaNs['WallId(s)'] != True): #Should only be 1 NA and it should be WallId(s)
    print("[cleans_minimal] More than one column has a NaN in it")


## Time

The time column is in the form of second(s) space millisecond with six 0's after the milliseconds (up to 999 milliseconds are contained in the time). So "0 1000000" is 1 millisecond while "0 10000000" is 10 milliseconds. However, times that are just seconds, and have 0 milliseconds only have one 0, so "1 0" is one second and not "1 000000".

In [308]:
for i in df.index:
    (second, millisecond) = df['Time'][i].split(' ')
    second = float(second)
    if(millisecond != '0'):
        millisecond = float(millisecond[:-6]) / 1000.0
    else:
        millisecond = float(millisecond)
    df.at[i, 'Time'] = second + millisecond
df.head()

Unnamed: 0,Time,ID,X,Y,Yaw,ResetID,checkCorrectness,NumberOfWalls,WallId(s)
0,0.001,0,0.0,0.0,0.0,1,1,0,
1,0.002,0,0.0,0.0,0.0,1,1,0,
2,0.003,0,0.0,0.0,0.0,1,1,0,
3,0.004,0,0.0,0.0,0.0,1,1,0,
4,0.005,0,0.0,0.0,0.0,1,1,0,


## Check Correctness

The gazebo simulator verifies that the ball is in a hub, and the hubs/weaselballs are within the environment. CheckCorrectness is the variable that gets printed to the datalog to verify that the simulator is running correctly for a given timestep. So, any rows with a checkCorrectness = 0 should be removed.

In [309]:
#If the checkCorrectness code doesn't become generalized, then it may be appropiate to skips this cell
df = df[df.checkCorrectness != 0]
df.head()

Unnamed: 0,Time,ID,X,Y,Yaw,ResetID,checkCorrectness,NumberOfWalls,WallId(s)
0,0.001,0,0.0,0.0,0.0,1,1,0,
1,0.002,0,0.0,0.0,0.0,1,1,0,
2,0.003,0,0.0,0.0,0.0,1,1,0,
3,0.004,0,0.0,0.0,0.0,1,1,0,
4,0.005,0,0.0,0.0,0.0,1,1,0,


## WallId(s) / NumberOfWalls

Since the Gazebo simulator will have the models shoot out after a collision, I will add a huerisitc where if a wall was touched in the last n ms and there are no collisions currently then we will consider the row to collide with the wall.

In [310]:
#Make all wallId(s) strings
for i in df.index:
    if df['NumberOfWalls'][i] > 0:
        if(type(df['WallId(s)'][i]) == float):
            df.at[i, 'WallId(s)'] = str(int(df['WallId(s)'][i]))

In [311]:
#verify
for i in df.index:
    if df['NumberOfWalls'][i] > 0:
        if type(df['WallId(s)'][i]) != str:
             print("Error, non-string detected!")

In [312]:
n = 100 #milliseconds since last collision

In [313]:
rowsSinceLastWall = 0
lastWall = None
lastNumberOfWalls = None
for i in df.index:
    rowNumberOfWalls = df['NumberOfWalls'][i]
    rowWallIds = df['WallId(s)'][i]
    if rowNumberOfWalls > 0:
        rowsSinceLastWall = 0
        lastWall = rowWallIds
        lastNumberOfWalls = rowNumberOfWalls
    elif rowsSinceLastWall < n and lastWall != None:
        df.at[i, 'NumberOfWalls'] = lastNumberOfWalls
        df.at[i, 'WallId(s)'] = lastWall
    rowsSinceLastWall += 1

In [314]:
total = 0
for i in df.index:
    total += df['NumberOfWalls'][i]
total

4663

## Enclosure Data

Here I will import the enclosure data

In [315]:
enclosureFile = datalog_DIR + 'boundaryDescription.txt'
enclosure_df = pd.read_csv(enclosureFile, sep=',')
enclosure_df.head()


Unnamed: 0,name,X,Y,Z,Roll,Pitch,Yaw,sizeX,sizeY,sizeZ
0,rail01,0.56355,0.0,0.03175,0,0,0.0,0.01905,1.12713,0.0889
1,rail02,0.0,0.56356,0.03175,0,0,1.57,0.01905,1.1525,0.0889
2,rail03,-0.56355,0.0,0.03175,0,0,3.14,0.01905,1.12713,0.0889
3,rail04,0.0,-0.56356,0.03175,0,0,-1.57319,0.01905,1.1525,0.0889


Next I will change the name of the railXX to become the ID to match the df.

In [316]:
for i in enclosure_df.index:
    enclosure_df.at[i, 'name'] = int(enclosure_df.at[i, 'name'].replace("rail",""))
enclosure_df.head()

Unnamed: 0,name,X,Y,Z,Roll,Pitch,Yaw,sizeX,sizeY,sizeZ
0,1,0.56355,0.0,0.03175,0,0,0.0,0.01905,1.12713,0.0889
1,2,0.0,0.56356,0.03175,0,0,1.57,0.01905,1.1525,0.0889
2,3,-0.56355,0.0,0.03175,0,0,3.14,0.01905,1.12713,0.0889
3,4,0.0,-0.56356,0.03175,0,0,-1.57319,0.01905,1.1525,0.0889


Now I will get a vector to represent each corner, this can be used to perform a cross product on the trajectory of 
the robot going into/out of a corn to find the angle that the robot enters/leaves

In [317]:
#get vector
from numpy import ones,vstack
from numpy.linalg import lstsq
for i in enclosure_df.index:
    x1 = enclosure_df.at[i, 'X'] - (enclosure_df.at[i,'sizeX'] / 2.0) * np.cos(enclosure_df.at[i,'Yaw'])
    y1 = enclosure_df.at[i, 'Y'] - (enclosure_df.at[i,'sizeX'] / 2.0) * np.sin(enclosure_df.at[i,'Yaw'])
    x2 = x1 + (enclosure_df.at[i,'sizeY']*np.sin(enclosure_df.at[i,'Yaw']))
    y2 = y1 + (enclosure_df.at[i,'sizeY']*np.cos(enclosure_df.at[i,'Yaw']))
    
    v = (x2-x1, y2-y1)
    
    enclosure_df.at[i,'vector_x'] = v[0]
    enclosure_df.at[i,'vector_y'] = v[1]
enclosure_df


Unnamed: 0,name,X,Y,Z,Roll,Pitch,Yaw,sizeX,sizeY,sizeZ,vector_x,vector_y
0,1,0.56355,0.0,0.03175,0,0,0.0,0.01905,1.12713,0.0889,0.0,1.12713
1,2,0.0,0.56356,0.03175,0,0,1.57,0.01905,1.1525,0.0889,1.1525,0.000918
2,3,-0.56355,0.0,0.03175,0,0,3.14,0.01905,1.12713,0.0889,0.001795,-1.127129
3,4,0.0,-0.56356,0.03175,0,0,-1.57319,0.01905,1.1525,0.0889,-1.152497,-0.002759


## Bounce angle

To get the bounce angle, 2 lines are needed. The first one is the line from the wall which is found above. The second line comes from creating a line from the point where the wall is hit with the points from the previous k time steps.

In [318]:
MAX_K = 10
df.head()

Unnamed: 0,Time,ID,X,Y,Yaw,ResetID,checkCorrectness,NumberOfWalls,WallId(s)
0,0.001,0,0.0,0.0,0.0,1,1,0,
1,0.002,0,0.0,0.0,0.0,1,1,0,
2,0.003,0,0.0,0.0,0.0,1,1,0,
3,0.004,0,0.0,0.0,0.0,1,1,0,
4,0.005,0,0.0,0.0,0.0,1,1,0,


In [319]:
#Get incoming bounce angles
for i in df.index:
    #I choose the weird if statement here because sometimes if we have a 2 walls, it can hit a 1 wall first.
    #I want the person analyzing the data to decide if that is useful
    if df.at[i, 'NumberOfWalls'] > 0 and df.at[i-1, 'NumberOfWalls'] < df.at[i, 'NumberOfWalls']:
        angles = []
        residuals = []
        wallIds = (df.at[i, 'WallId(s)']).split('&')
        for wall in wallIds:
            wallV = (enclosure_df.at[int(wall)-1,'vector_x'], enclosure_df.at[int(wall)-1,'vector_y'])
            weaselPoints = []
            for local_max_k in range(1,MAX_K+1):
                localWeaselPoints = []
                for k in range(local_max_k+1):
                    localWeaselPoints.append((df.at[i-k*100, 'X'],df.at[i-k*100, 'Y'])) #Add a -1 because 0 based iteration over k
                weaselPoints.append(localWeaselPoints)
            local_angles = []
            local_residuals = []
            for points in weaselPoints:
                #Find line of best fit for k
                x,y = zip(*points)
                #The residual value returned is the sum of the squares of the fit errors
                line, residual, _, _, _ = (np.polyfit(x, y, 1, full=True)) 
                weaselV = (1, line[0]) #line.c[0] is the slope of the line
                try:
                    angle = math.acos((np.dot(wallV,weaselV)) / (np.linalg.norm(weaselV) * np.linalg.norm(wallV)))
                except Exception as e:
                    print(e)
                    angle = -999999999
                local_angles.append(angle)
                local_residuals.append(residual)
            angles.append(local_angles)
            residuals.append(local_residuals)
        #Write angle data to df.
        #If it is just 1 wall, then just write the angle
        #If it is more than 1 wall, write angles with an & in between them
        for j in range(df.at[i, 'NumberOfWalls']):
            for k in range(1,MAX_K+1):
                df.at[i, 'in_angle'+str(k)] = str(angles[j][k-1])
                df.at[i, 'residuals_in_angle'+str(k)] = str(residuals[j][k-1])
                if(j != df.at[i, 'NumberOfWalls']-1):
                    df.at[i, 'in_angle'+str(k)] = df.at[i, 'in_angle'+str(k)] + "&"
                    df.at[i, 'residuals_in_angle'+str(k)] = df.at[i, 'residuals_in_angle'+str(k)] + "&"

                

    


## Output CSV

In [320]:
df.head()

Unnamed: 0,Time,ID,X,Y,Yaw,ResetID,checkCorrectness,NumberOfWalls,WallId(s),in_angle1,...,in_angle6,residuals_in_angle6,in_angle7,residuals_in_angle7,in_angle8,residuals_in_angle8,in_angle9,residuals_in_angle9,in_angle10,residuals_in_angle10
0,0.001,0,0.0,0.0,0.0,1,1,0,,,...,,,,,,,,,,
1,0.002,0,0.0,0.0,0.0,1,1,0,,,...,,,,,,,,,,
2,0.003,0,0.0,0.0,0.0,1,1,0,,,...,,,,,,,,,,
3,0.004,0,0.0,0.0,0.0,1,1,0,,,...,,,,,,,,,,
4,0.005,0,0.0,0.0,0.0,1,1,0,,,...,,,,,,,,,,


In [321]:
df.to_csv(datalog_DIR + "results.csv")

## Debug

In [322]:
#Find rows with more than 2 walls
for i in df.index:
    if df.at[i, 'NumberOfWalls'] > 0 and df.at[i-1, 'NumberOfWalls'] < df.at[i, 'NumberOfWalls']:
        print(df.loc[i])

Time                                 1.881
ID                                       0
X                                 0.273929
Y                                 0.493964
Yaw                              0.0203294
ResetID                                  1
checkCorrectness                         1
NumberOfWalls                            1
WallId(s)                                2
in_angle1               1.1222096893779945
residuals_in_angle1                     []
in_angle2               1.0464328190030465
residuals_in_angle2       [3.83378987e-05]
in_angle3               0.9368219656211045
residuals_in_angle3            [0.0002644]
in_angle4               0.8620629257861993
residuals_in_angle4           [0.00049416]
in_angle5               0.8661404468104638
residuals_in_angle5            [0.0004961]
in_angle6               0.9085456761206693
residuals_in_angle6           [0.00104352]
in_angle7               0.9630387857852427
residuals_in_angle7           [0.00271051]
in_angle8  