# Generate datapoints in 3 dimensions

__Objective:__ create a dataset of points on which we can then perform dimensional reduction and clustering.

In [None]:
import numpy as np
import pandas as pd
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go

init_notebook_mode(connected=True)

Generate points with two features, normally distributed in the two dimension according with different means and standard deviations to create clusters.

In [None]:
v1 = np.array([
    np.random.normal(loc=10.0,size=(10000)),
    np.random.normal(size=(10000))
]).T

v2 = np.array([
    np.random.normal(loc=7.0, scale=0.5,size=(10000)),
    np.random.normal(loc=2.0, scale=0.3,size=(10000))
]).T

In [None]:
v1.shape, v2.shape

In [None]:
trace1 = go.Scatter(
    x = v1[:,0],
    y = v1[:,1],
    mode = 'markers'
)

trace2 = go.Scatter(
    x = v2[:,0],
    y = v2[:,1],
    mode = 'markers'
)

data = [trace1, trace2]

fig = go.Figure(data=data)

iplot(fig)

Add a third dimension to the feature vector, with value dependent on the other two features.

In [None]:
z1 = np.random.normal(loc=v1[:,0]**4, scale=5000)
z2 = np.random.normal(loc=v2[:,0]**4, scale=5000)

In [None]:
v1 = np.hstack([v1,z1.reshape((10000,1))])

In [None]:
v2 = np.hstack([v2,z2.reshape((10000,1))])

In [None]:
v2.shape

In [None]:
trace1 = go.Scatter3d(
    x = v1[:,0],
    y = v1[:,1],
    z = v1[:,2],
    mode = 'markers',
    marker = dict(
        size = 3.0
    )
)

trace2 = go.Scatter3d(
    x = v2[:,0],
    y = v2[:,1],
    z = v2[:,2],
    mode='markers',
    marker = dict(
        size = 3.0
    )
)


data = [trace1, trace2]

fig = go.Figure(data=data)

iplot(fig)

Put all the datapoints in one single array, then put it into a pandas dataframe and export it to csv.

In [None]:
X = np.concatenate((v1, v2), 0)

In [None]:
trace = go.Scatter3d(
    x = X[:,0],
    y = X[:,1],
    z = X[:,2],
    mode = 'markers',
    marker = dict(
        size = 3.0
    )
)


data = [trace]

fig = go.Figure(data=data)

iplot(fig)

In [None]:
data = pd.DataFrame({'x': X[:,0], 'y': X[:,1], 'z': X[:,2]})

data.shape

In [None]:
data.to_csv('../data/data.csv', index=None)