# Bhattachryya Distances Validation
* [Plotly](https://plot.ly/)
* [Bhattacharyya](https://en.wikipedia.org/wiki/Bhattacharyya_distance)

In [1]:
import numpy as np
import clustering
import matplotlib.pyplot as plt

s1 = np.random.uniform(0, 7, (100,2))
s2 = np.random.uniform(2, 8, (100,2))
s3 = np.random.uniform(30, 40, (100,2))

In [51]:
import plotly.graph_objs as go
import plotly.plotly as py

## Step 1. Two-D Scatter of random points

In [52]:
# s1 setting
trace1 = go.Scatter(
    x=s1[:,0],
    y=s1[:,1],
    name='s1',
    mode='markers',
    # Point size color and transparency
    marker=dict(
        size=7,
        # Point outline color and width
        line=dict(
            color='rgba(217, 217, 217, 0.14)',
            width=0.5
        ),
        opacity=0.8
    )
)

# s2 setting
trace2 = go.Scatter(
    x=s2[:,0],
    y=s2[:,1],
    name='s2',
    mode='markers',
    marker=dict(
        color='rgba(204, 10, 20)',
        size=7,
        symbol='circle',
        line=dict(
            color='rgb(204, 10, 20)',
            width=1
        ),
        opacity=0.9
    )
)

# s3 setting
trace3 = go.Scatter(
    x=s3[:,0],
    y=s3[:,1],
    name='s3',
    mode='markers',
    marker=dict(
        color='rgba(20, 100, 200)',
        size=7,
        symbol='circle',
        line=dict(
            color='rgb(20, 100, 200)',
            width=1
        ),
        opacity=0.9
    )
)

data = [trace1, trace2, trace3]
layout = go.Layout(
    title='2D Scatter of 3 random sets',
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    )
)
fig = go.Figure(data=data, layout=layout)
# py.offline.iplot(fig)# offline plot
py.iplot(fig)

## Step 2. Calculate the related distribution

In [2]:
mean_s1 = np.mean(s1, axis=0)
cov_s1 = np.cov(s1.T)
distribution_s1 = [mean_s1, cov_s1]

mean_s2 = np.mean(s2, axis=0)
cov_s2 = np.cov(s2.T)
distribution_s2 = [mean_s2, cov_s2]


mean_s3 = np.mean(s3, axis=0)
cov_s3 = np.cov(s3.T)
distribution_s3 = [mean_s3, cov_s3]

In [63]:
def bhat_distance(obs_a, obs_b):
    """Caculate the Bhattacharyya Distance of every oberservations in point
    Reference
    ---------
    https://en.wikipedia.org/wiki/Bhattacharyya_distance
    """
    if not len(obs_a) == len(obs_b):
        raise ValueError("a and b must be of the same size")
    DIFF = np.array(obs_a[0])-np.array(obs_b[0])
    SUM = (np.array(obs_a[1])+np.array(obs_b[1]))*0.5
    DET = np.linalg.det(SUM)/np.sqrt(np.linalg.det(np.array(obs_a[1]))* \
    np.linalg.det(np.array(obs_b[1])))
    return 0.125*np.dot(np.dot(DIFF, np.linalg.inv(SUM)), DIFF.T) + \
         0.5*np.log10(DET)

## Step 3. Return the Bhattacharyya Distance of each Distribution

In [66]:
d12 = bhat_distance(distribution_s1, distribution_s2)
d13 = bhat_distance(distribution_s1, distribution_s3)
d23 = bhat_distance(distribution_s2, distribution_s3)

print('Bhattacharyya Distances Between s1 and s2: ', d12)
print('Bhattacharyya Distances Between s1 and s3: ', d13)
print('Bhattacharyya Distances Between s2 and s3: ', d23)


('Bhattacharyya Distances Between s1 and s2: ', 0.0984455061965577)
('Bhattacharyya Distances Between s1 and s3: ', 40.92904056806407)
('Bhattacharyya Distances Between s2 and s3: ', 44.60535614340598)


### Step 3. Three-D Graph with Probability Density

In [7]:
from scipy.stats import multivariate_normal
rv_s1 = multivariate_normal(mean_s1, cov_s1)
rv_s2 = multivariate_normal(mean_s2, cov_s2)
rv_s3 = multivariate_normal(mean_s3, cov_s3)

z1 = np.insert(s1, 2, values=rv_s1.pdf(s1), axis=1)
z2 = np.insert(s2, 2, values=rv_s2.pdf(s2), axis=1)
z3 = np.insert(s3, 2, values=rv_s3.pdf(s3), axis=1)

In [110]:
# s1 setting
trace1 = go.Scatter3d(
    x=s1[:,0],
    y=s1[:,1],
    z=rv_s1.pdf(s1),
    mode='markers',
    # Point size color and transparency
    marker=dict(
        size=7,
        # Point outline color and width
        line=dict(
            color='rgba(217, 217, 217, 0.14)',
            width=0.5
        ),
        opacity=0.8
    )
)

# s2 setting
trace2 = go.Scatter3d(
    x=s2[:,0],
    y=s2[:,1],
    z=rv_s2.pdf(s2),
    mode='markers',
    marker=dict(
        color='rgba(204, 10, 20)',
        size=7,
        symbol='circle',
        line=dict(
            color='rgb(204, 10, 20)',
            width=1
        ),
        opacity=0.9
    )
)

# s3 setting
trace3 = go.Scatter3d(
    x=s3[:,0],
    y=s3[:,1],
    z=rv_s3.pdf(s3),
    mode='markers',
    marker=dict(
        color='rgba(20, 100, 200)',
        size=7,
        symbol='circle',
        line=dict(
            color='rgb(20, 100, 200)',
            width=1
        ),
        opacity=0.9
    )
)

data = [trace1, trace2, trace3]
layout = go.Layout(
    title='3D Scatter of 3 random sets',
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    )
)
fig = go.Figure(data=data, layout=layout)
# py.offline.iplot(fig)# offline plot
py.iplot(fig)

In [111]:
# s1 setting
distribution1 = go.Surface(
    z=z1
)

# s2 setting
distribution2 = go.Surface(
    z=z2,
    showscale=False,
    opacity=0.9 
)

# s3 setting
distribution3 = go.Surface(
    z=z3,
    showscale=False,
    opacity=0.9 
)

In [117]:
data1 = [distribution1, distribution2, distribution3]

py.iplot(data1)

In [128]:
data1 = [
    go.Surface(
        z=z1.tolist()
    )
]
layout1 = go.Layout(
    title='Mt Bruno Elevation',
    autosize=False,
    width=500,
    height=500,
    margin=dict(
        l=65,
        r=50,
        b=65,
        t=90
    )
)
fig = go.Figure(data=data1, layout=layout1)
py.iplot(fig)

PlotlyRequestError: Account limit reached: Your account is limited to creating 25 charts. To continue, you can override or delete existing charts or you can upgrade your account at: https://plot.ly/products/cloud

In [127]:
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/volcano.csv')
z = df.values.tolist()
z1.tolist()
print type(z1.tolist())
data = [go.Surface(z=df.values.tolist(), colorscale='Viridis')]

<type 'list'>
