In [6]:
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler

import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt

In [7]:
# Ensure the pointctl tool is installed
!pointctl --help

Point cloud processing 0.3.0
Gijs van Steenpaal <g.j.vansteenpaal@students.uu.nl>
Program for generating, processing and explaining point clouds

USAGE:
    pointctl [SUBCOMMAND]

FLAGS:
    -h, --help       Prints help information
    -V, --version    Prints version information

SUBCOMMANDS:
    explain     Calculate a explanation given the original and reduced dataset (Only DaSilva right now)
    generate    Generate synthetic point clouds
    help        Prints this message or the help of the given subcommand(s)
    reduce      Reduce a nD dataset to 2D or 3D
    view        Allows you to view 3D data points given the original data, reduced points and the annotations. This
                command assumes that the reduced points, original data and annotations have matching indexes. The to
                start the viewer you need to provide the original data and either a 2d or 3d reduced set.
                Annotations are optional and can be computed from the viewer 


## Load in the cube dataset and reduce using PCA

In [8]:
!pointctl generate -n 0.01 -p 10000 ../data/cube/cube.csv

Will generate 10000 points in cube pattern
Generated 1000 points
Generated 2000 points
Generated 3000 points
Generated 4000 points
Generated 5000 points
Generated 6000 points
Generated 7000 points
Generated 8000 points
Generated 9000 points
Generated 10000 points
All points written to file


In [11]:
df = pd.read_csv('../data/cube/cube.csv', delimiter=';')

In [12]:
def label(row):
    if row[0] < 0.05:
        return 0
    elif row[1] < 0.05:
        return 1
    elif row[2] < 0.05:
        return 2
df['cls'] = df.apply(lambda row: label(row), axis=1)

In [13]:
df.head()

Unnamed: 0,x,y,z,cls
0,0.436531,-0.002838,0.765682,1
1,0.39383,0.201358,-0.007635,2
2,0.47086,0.502421,0.0056,2
3,0.004447,0.273301,0.122724,0
4,-0.001644,0.420992,0.162364,0


In [18]:
pca_res = PCA(n_components=2).fit_transform(df[['x','y','z']])

In [21]:
res_df = pd.DataFrame({'x': pca_res[:,0], 'y': pca_res[:,1], 'cls': df['cls']})
res_df.head()

Unnamed: 0,x,y,cls
0,0.446623,-0.315201,1
1,0.038137,0.278627,2
2,-0.156885,0.359584,2
3,-0.18847,-0.055468,0
4,-0.297893,-0.068674,0


In [22]:
result = pd.DataFrame(res_df[['x','y']])
result.to_csv('../data/cube/cube-reduced.csv', index=None, sep=';')

In [24]:
!pointctl view -i ../data/cube/cube.csv --r2d ../data/cube/cube-reduced.csv

Original data loaded. Consists of 10000 points with 3 dimensions
Reduced 2D data loaded. Consists of 10000 points with 2 dimensions


## Running TSNE

In [25]:
tsne_res = TSNE(n_components=2, verbose=10).fit_transform(df[['x', 'y', 'z']])

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 10000 samples in 0.007s...
[t-SNE] Computed neighbors for 10000 samples in 0.322s...
[t-SNE] Computed conditional probabilities for sample 1000 / 10000
[t-SNE] Computed conditional probabilities for sample 2000 / 10000
[t-SNE] Computed conditional probabilities for sample 3000 / 10000
[t-SNE] Computed conditional probabilities for sample 4000 / 10000
[t-SNE] Computed conditional probabilities for sample 5000 / 10000
[t-SNE] Computed conditional probabilities for sample 6000 / 10000
[t-SNE] Computed conditional probabilities for sample 7000 / 10000
[t-SNE] Computed conditional probabilities for sample 8000 / 10000
[t-SNE] Computed conditional probabilities for sample 9000 / 10000
[t-SNE] Computed conditional probabilities for sample 10000 / 10000
[t-SNE] Mean sigma: 0.032680
[t-SNE] Computed conditional probabilities in 0.607s
[t-SNE] Iteration 50: error = 96.1548233, gradient norm = 0.0201064 (50 iterations in 1.774s)
[t-SNE] It

In [27]:
res_df = pd.DataFrame({'x': tsne_res[:,0], 'y': tsne_res[:,1], 'cls': df['cls']})
res_df.head()

Unnamed: 0,x,y,cls
0,68.375427,-11.442039,1
1,-23.554428,-21.987862,2
2,-46.614769,-14.629387,2
3,-9.102798,17.477783,0
4,-16.187662,26.885998,0


In [29]:
result = pd.DataFrame(res_df[['x','y']])
result.to_csv('../data/cube/cube-reduced-tsne.csv', index=None, sep=';')

## Plot result of 2D plots

In [None]:
plt.figure(figsize=(16,16))
sns.scatterplot(
    x='y', y='x', hue="cls",
    palette=sns.color_palette("hls", 3),
    data=res_df,
    legend="full",
    alpha=0.3
)