In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Read dataset from csv
import pandas as pd
import os
import numpy as np
import time

train_dir = '/content/drive/MyDrive/CapstoneDataset/training'
test_dir = '/content/drive/MyDrive/CapstoneDataset/test'

train_data = []
test_data = []
file_number = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18']

for number in file_number:
  print(train_dir+'/'+number+'_train.csv')
  df = pd.read_csv(train_dir+'/'+number+'_train.csv',header=5)
  df = df.drop('Frame', axis=1)
  df = df.drop('Time (Seconds)', axis=1)
  train_data.append(df)

for number in file_number:
  print(test_dir+'/'+number+'_test.csv')
  df = pd.read_csv(test_dir+'/'+number+'_test.csv',header=5)
  df = df.drop('Frame', axis=1)
  df = df.drop('Time (Seconds)', axis=1)
  test_data.append(df)

/content/drive/MyDrive/CapstoneDataset/training/01_train.csv
/content/drive/MyDrive/CapstoneDataset/training/02_train.csv
/content/drive/MyDrive/CapstoneDataset/training/03_train.csv
/content/drive/MyDrive/CapstoneDataset/training/04_train.csv
/content/drive/MyDrive/CapstoneDataset/training/05_train.csv
/content/drive/MyDrive/CapstoneDataset/training/06_train.csv
/content/drive/MyDrive/CapstoneDataset/training/07_train.csv
/content/drive/MyDrive/CapstoneDataset/training/08_train.csv
/content/drive/MyDrive/CapstoneDataset/training/09_train.csv
/content/drive/MyDrive/CapstoneDataset/training/10_train.csv
/content/drive/MyDrive/CapstoneDataset/training/11_train.csv
/content/drive/MyDrive/CapstoneDataset/training/12_train.csv
/content/drive/MyDrive/CapstoneDataset/training/13_train.csv
/content/drive/MyDrive/CapstoneDataset/training/14_train.csv
/content/drive/MyDrive/CapstoneDataset/training/15_train.csv
/content/drive/MyDrive/CapstoneDataset/training/16_train.csv
/content/drive/MyDrive/C

This part took me over one hour. I exported the final result to csv file. I won't run this part again.

In [None]:
# Furthest Point Sampling
from os import remove
def read_points(x,y,z):
    points = np.stack([x, y, z], axis=1)
    return points

''' Strategy of FPS
Step 1: Use the first point in the point cloud as the query point and take the furthest point from the remaining points
Step 2: Continue with the point taken out as the query point and take the furthest point from the remaining points. 
Now, since the number of points already taken out is greater than 1, each point in the set of points already selected needs to be taken into account, 
The calculation logic is as follows.
    Step 2.1: For any of the remaining points, calculate the distance from that point to all points in the selected point set.
    Step 2.2: Take the minimum value, which is the distance from the remaining points to the selected set of points.
    Step 2.3: After calculating the distance of each remaining point to the point set, take the point with the maxiumn distance.
Step 3: repeat step 2 until the number K=100.
'''
class FPS:
    # eliminate duplicate points
    def __init__(self, points):
        print('init start')
        self.points = np.unique(points, axis=0)
        print('init finish')

    # calculate the distance from any of the remaining points to all points in the selected point set. np.stack()
    # Take the minumum distance as the the distance from the remaining points to the selected set of points. np.min()
    # After calculating the distance of each remaining point to the point set, take the point with the maxiumn distance. np.argmax()
    def get_min_distance(self, a, b):
        distance = []
        for i in range(a.shape[0]):
            dis = np.sum(np.square(a[i] - b), axis=-1)
            distance.append(dis)
        distance = np.stack(distance, axis=-1)
        distance = np.min(distance, axis=-1)
        return np.argmax(distance)
    
    # find minimun xyz value and maximum xyz value。
    # Arrange and combine all values to obtain the cornermost point set in the point cloud.
    @staticmethod
    def get_model_corners(model):
        min_x, max_x = np.min(model[:, 0]), np.max(model[:, 0])
        min_y, max_y = np.min(model[:, 1]), np.max(model[:, 1])
        min_z, max_z = np.min(model[:, 2]), np.max(model[:, 2])
        corners_3d = np.array([
            [min_x, min_y, min_z],
            [min_x, min_y, max_z],
            [min_x, max_y, min_z],
            [min_x, max_y, max_z],
            [max_x, min_y, min_z],
            [max_x, min_y, max_z],
            [max_x, max_y, min_z],
            [max_x, max_y, max_z],
        ])
        return corners_3d
    def compute_fps(self, K):
        # compute centre point location according to the cornermost point set
        corner_3d = self.get_model_corners(self.points)
        center_3d = (np.max(corner_3d, 0) + np.min(corner_3d, 0)) / 2
        A = np.array([center_3d])
        B = np.array(self.points)
        t = []
        # looking for k nodes
        for i in range(K):
            max_id = self.get_min_distance(A, B)
            A = np.append(A, np.array([B[max_id]]), 0)
            B = np.delete(B, max_id, 0)
            t.append(max_id)
        return A, t

# Calculate the center point of 89 markers. The result is a nrows*3 matrix.
def compute_center_point(dataframe):
  points = pd.DataFrame(data=None,columns=['X','Y','Z'])
  index = 0
  size = dataframe.shape[1]
  for row in dataframe.index:
    x = []
    y = []
    z = []
    for i in range(2,size+1,3):
      x.append(dataframe.loc[row][i-2])
      y.append(dataframe.loc[row][i-1])
      z.append(dataframe.loc[row][i])
    points.loc[index] = [sum(x)/len(x),sum(y)/len(y),sum(z)/len(z)]
    index = index + 1
  return points

# Export sampled data to csv
sampled_train_dir = '/content/drive/MyDrive/CapstoneDataset/sampled_training'
sampled_test_dir = '/content/drive/MyDrive/CapstoneDataset/sampled_test'


# First step, calculate the center point of 89 markers in each frame.
# Second step, use FPS to find the 100 furthest points and index.
# Third step, back to the original dataset to find corresponding 89 markers. 89*100 matrix.
count = 1
for df in train_data:
  points = compute_center_point(df)
  print(count)
  f = FPS(read_points(points['X'],points['Y'],points['Z']))

  # Get the index of the 100 furthest points
  # Change the sampling number here
  (C,index) = f.compute_fps(100)

  # create a new dataframe has the same column as old df
  # return to original point cloud and get 89 points according to index. The result is a 100*89 matrix.
  data1 = pd.DataFrame(columns = df.columns.to_list())
  for i in index:
    data1 = data1.append(df.loc[i],ignore_index=True)
  data1.to_csv(sampled_train_dir+'/'+str(count)+'.csv', index=False)
  print('finish output')
  count = int(count) + 1

count = 1
for df in test_data:
  points = compute_center_point(df)
  print(count)
  f = FPS(read_points(points['X'],points['Y'],points['Z']))

  # Change the sampling number here
  (C,index) = f.compute_fps(100)

  # create a new dataframe has the same column as old df and export it to csv
  data1 = pd.DataFrame(columns = df.columns.to_list())
  for i in index:
    data1 = data1.append(df.loc[i],ignore_index=True)
  data1.to_csv(sampled_test_dir+'/'+str(count)+'.csv', index=False)
  print('finish output')
  count = int(count) + 1

# count = 1
# for i in range(2,268,3):
#   f = FPS(read_points(df[df.columns[i-2]],df[df.columns[i-1]],df[df.columns[i]]))
#   C = f.compute_fps(100)#Number of sampling points
#   file.writelines("marker"+str(count)+"\n")
#   count = count + 1
#   for j in C:
#     file.writelines(str(float(j[0]))+"\t"+str(float(j[1]))+"\t"+str(float(j[2]))+"\n")

11
init start
init finish
finish output


In [21]:
# Read sampled data from csv
import pandas as pd
import os
import numpy as np

sampled_train_dir = '/content/drive/MyDrive/CapstoneDataset/sampled_training'
sampled_test_dir = '/content/drive/MyDrive/CapstoneDataset/sampled_test'
sampled_file_number = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18']
sampled_train_data = []
sampled_test_data = []
for number in sampled_file_number:
  print(sampled_train_dir+'/'+number+'.csv')
  df = pd.read_csv(sampled_train_dir+'/'+number+'.csv')
  sampled_train_data.append(df)

for number in sampled_file_number:
  print(sampled_test_dir+'/'+number+'.csv')
  df = pd.read_csv(sampled_test_dir+'/'+number+'.csv')
  sampled_test_data.append(df)

/content/drive/MyDrive/CapstoneDataset/sampled_training/1.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/2.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/3.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/4.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/5.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/6.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/7.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/8.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/9.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/10.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/11.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/12.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/13.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/14.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/15.csv
/content/drive/MyDrive/CapstoneDataset/sampled_training/16.csv
/

In [23]:
# Normalization
from itertools import chain

# Compute the center point of 89*100 points.
def compute_centroid(pc):
  centroid = []
  x = pd.DataFrame()
  y = pd.DataFrame()
  z = pd.DataFrame()
  for i in range(0,265,3):
    newX = pd.DataFrame(pc.iloc[:,i].values)
    x = x.append(newX,ignore_index=True)
  centroid.append(x.sum()/len(x))
  for j in range(1,266,3):
    newY = pd.DataFrame(pc.iloc[:,j].values)
    y = y.append(newY,ignore_index=True)
  centroid.append(y.sum()/len(y))
  for k in range(2,267,3):
    newZ = pd.DataFrame(pc.iloc[:,k].values)
    z = z.append(newZ,ignore_index=True)
  centroid.append(z.sum()/len(z))
  centroid = pd.DataFrame(centroid)
  return centroid

# centroid = compute_centroid(sampled_train_data[0])

def normalize_point_cloud(pc):
    centroid = compute_centroid(pc) # compute center of point cloud
    centroid = centroid.T
    centroid = centroid.values
    centroid = list(chain.from_iterable(centroid))
    # put centre of the point cloud to (0, 0, 0)
    for i in range(0,265,3):
      pc.iloc[:,i] = pc.iloc[:,i] - centroid[0]
    for j in range(1,266,3):
      pc.iloc[:,j] = pc.iloc[:,j] - centroid[1]
    for k in range(2,267,3):
      pc.iloc[:,k] = pc.iloc[:,k] - centroid[2]
    
    # find the longest axis in XYZ axis and compute the length. This step can get a scaling ratio.
    # Step 1: The point cloud after translation is squared
    # Step 2: Sum according to row. This step can get a 100*1 matrix. The original matrix is 100*267
    # Step 3: Find the square root and find the maxiumn value as scaling ratio
    m = np.max(np.sqrt(np.sum(pc ** 2, axis=1)))

    # Scaling point cloud according the ratio
    pc_normalized = pc / m # normalize point cloud to (-1,1) according to long axis
    return pc, pc_normalized, centroid, m  # centroid: center point, m: length of long axis, centroid and m can be used to compute keypoints


# Export normalization data to csv
normalize_train_dir = '/content/drive/MyDrive/CapstoneDataset/normalize_training'
normalize_test_dir = '/content/drive/MyDrive/CapstoneDataset/normalize_test'

count = 1
for data in sampled_train_data:
  print(count)
  (pc, pc_normalized, centroid, length) = normalize_point_cloud(data)
  if(pc_normalized.min().min() >= -1 and pc_normalized.max().max() <= 1):
    pc_normalized.to_csv(normalize_train_dir+'/'+str(count)+'.csv', index=False)
    print('finish output')
    count = int(count) + 1

count = 1
for data in sampled_test_data:
  print(count)
  (pc, pc_normalized, centroid, length) = normalize_point_cloud(data)
  if(pc_normalized.min().min() >= -1 and pc_normalized.max().max() <= 1):
    pc_normalized.to_csv(normalize_test_dir+'/'+str(count)+'.csv', index=False)
    print('finish output')
    count = int(count) + 1

1
finish output
2
finish output
3
finish output
4
finish output
5
finish output
6
finish output
7
finish output
8
finish output
9
finish output
10
finish output
11
finish output
12
finish output
13
finish output
14
finish output
15
finish output
16
finish output
17
finish output
18
finish output
1
finish output
2
finish output
3
finish output
4
finish output
5
finish output
6
finish output
7
finish output
8
finish output
9
finish output
10
finish output
11
finish output
12
finish output
13
finish output
14
finish output
15
finish output
16
finish output
17
finish output
18
finish output


In [24]:
# Read normalization data from csv
import pandas as pd
import os
import numpy as np

normalize_file_number = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18']
normalize_train_data = []
normalize_test_data = []
for number in normalize_file_number:
  print(normalize_train_dir+'/'+number+'.csv')
  df = pd.read_csv(normalize_train_dir+'/'+number+'.csv')
  normalize_train_data.append(df)

for number in normalize_file_number:
  print(normalize_test_dir+'/'+number+'.csv')
  df = pd.read_csv(normalize_test_dir+'/'+number+'.csv')
  normalize_test_data.append(df)

/content/drive/MyDrive/CapstoneDataset/normalize_training/1.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/2.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/3.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/4.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/5.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/6.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/7.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/8.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/9.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/10.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/11.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/12.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/13.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/14.csv
/content/drive/MyDrive/CapstoneDataset/normalize_training/15.csv
/content/drive/MyDrive/CapstoneDat