<a href="https://colab.research.google.com/github/daviiandrade/TrabalhoIA/blob/main/MeanShift.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mean Shift

In [None]:
import math
import operator

import numpy as np
import matplotlib.pyplot as plt

import torch

%matplotlib inline

## Criando os dados

In [None]:
n_clusters = 6
n_samples = 1000

In [None]:
centroids = np.random.uniform(-35, 35, (n_clusters, 2))
slices = [np.random.multivariate_normal(centroids[i], np.diag([5., 5.]), n_samples)
          for i in range(n_clusters)]
data = np.concatenate(slices).astype(np.float32)

In [None]:
def plot_data(centroids, data, n_samples):
    colour = plt.cm.rainbow(np.linspace(0,1,len(centroids)))

    fig, ax = plt.subplots(figsize=(4, 4))
    for i, centroid in enumerate(centroids):
        samples = data[i * n_samples : (i + 1) * n_samples]
        ax.scatter(samples[:, 0], samples[:, 1], c=colour[i], s=1)
        ax.plot(centroid[0], centroid[1], markersize=10, marker="x", color='k', mew=5)
        ax.plot(centroid[0], centroid[1], markersize=5, marker="x", color='m', mew=2)
    plt.axis('equal')
    
plot_data(centroids, data, n_samples)

## Implementação

In [None]:
from numpy import exp, sqrt, array

In [None]:
def distance(x, X):
    # return np.linalg.norm(x - X, axis=1)
    return sqrt(((x - X)**2).sum(1))

In [None]:
a = array([1, 2])
b = array([[1, 2],
           [2, 3],
           [-1, -3]])

dist = distance(a, b)
dist

array([0.        , 1.41421356, 5.38516481])

In [None]:
def gaussian(dist, bandwidth):
    return exp(-0.5 * ((dist / bandwidth))**2) / (bandwidth * math.sqrt(2 * math.pi))

In [None]:
gaussian(dist, 2.5)

array([0.15957691, 0.13598247, 0.0156822 ])

In [None]:
def meanshift_step(X, bandwidth=2.5):
    for i, x in enumerate(X):
        dist = distance(x, X)
        weight = gaussian(dist, bandwidth)
        X[i] = (weight[:, None] * X).sum(0) / weight.sum()
    return X

Dados antes:

In [None]:
plot_data(centroids, data, n_samples)

Dados depois:

In [None]:
_X = meanshift_step(np.copy(data))
plot_data(centroids, _X, n_samples)

In [None]:
def meanshift(X):
    X = np.copy(X)
    for _ in range(5):
        X = meanshift_step(X)
    return X

In [None]:
%%time
X = meanshift(data)

CPU times: user 16.2 s, sys: 0 ns, total: 16.2 s
Wall time: 16.2 s


In [None]:
plot_data(centroids, X, n_samples)