-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 953d3b3
Showing
13 changed files
with
265 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
.idea | ||
.pytest_cache | ||
.coverage | ||
__pycache__ | ||
venv | ||
htmlcov | ||
.ipynb_checkpoints |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
language: python | ||
python: | ||
- '3.6.3' | ||
script: | ||
- py.test --cov=dbscan test/ | ||
after_success: | ||
- coveralls |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
|
||
The MIT License (MIT) | ||
|
||
Copyright (c) 2018 G Roques | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in | ||
all copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
THE SOFTWARE. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# DBSCAN | ||
|
||
Density-Based Spatial Clustering of Applications with Noise (DBSCAN) implementation in Python. | ||
|
||
API inspired by Scikit-learn. | ||
|
||
**Reference:** *Introduction to Data Mining* (1st Edition) by Pang-Ning Tan | ||
Section 8.4, Page 526 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .dataviz import generate_clusters | ||
from .dataviz import plot_clusters |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
from math import cos | ||
from math import pi | ||
from math import sin | ||
from random import Random | ||
from typing import List, Tuple | ||
|
||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
import seaborn as sns | ||
|
||
|
||
def plot_clusters(clusters: List[List], labels: List[int], centroids: List[List], seed=0) -> None: | ||
"""Plot cluster data. | ||
Args: | ||
clusters: Cluster data to plot. | ||
labels: Labels of each point. | ||
centroids: Center point of each cluster. | ||
seed: Seed for random number generator. | ||
Used to sample colors. | ||
Returns: | ||
None | ||
""" | ||
columns = ['x', 'y'] | ||
num_clusters = len(set(labels)) | ||
data = get_data(clusters, labels, centroids, columns) | ||
markers = get_markers(num_clusters) | ||
palette = get_palette(num_clusters, seed) | ||
g = sns.lmplot(*columns, | ||
data=data, | ||
markers=markers, | ||
palette=palette, | ||
fit_reg=False, | ||
legend=False, | ||
hue='labels', | ||
scatter_kws={'linewidth': 1, 'edgecolor': 'w'}) | ||
plt.show() | ||
|
||
|
||
def get_data(clusters, labels, centroids, columns) -> pd.DataFrame: | ||
"""Construct a DataFrame object to plot. | ||
Args: | ||
clusters: The cluster data. | ||
labels: Which cluster each point belongs to. | ||
centroids: The center point of each cluster. | ||
columns: Labels for each column of data. | ||
Returns: | ||
""" | ||
df = pd.DataFrame(clusters, columns=columns) | ||
df['labels'] = pd.Series(labels, index=df.index) # Add labels as a column for coloring | ||
centroids_df = pd.DataFrame(centroids, columns=columns) | ||
centroids_df['labels'] = ['centroid' for _ in range(len(centroids))] | ||
df = df.append(centroids_df, ignore_index=True) | ||
return df | ||
|
||
|
||
def get_markers(num_clusters) -> List[str]: | ||
"""Generate the marks for the plot. | ||
Uses circles 'o' for points, | ||
and crosses 'x' for centroids. | ||
Args: | ||
num_clusters: The number of clusters. | ||
Returns: | ||
A list of markers. | ||
""" | ||
markers = ['o' for _ in range(num_clusters)] | ||
markers.append('x') # Reserve 'x' for centroids | ||
return markers | ||
|
||
|
||
def get_palette(num_clusters, seed=0) -> List[str]: | ||
"""Generates a color palette for the plot. | ||
Uses random colors for different clusters, | ||
and reserves red for centroids. | ||
Args: | ||
num_clusters: The number of clusters. | ||
seed: Seed for random number generator. | ||
Returns: | ||
""" | ||
random = Random(seed) | ||
all_colors = ['b', 'g', 'c', 'm', 'orange'] | ||
palette = random.sample(all_colors, num_clusters) | ||
palette.append('red') # Reserve red color for centroids | ||
return palette | ||
|
||
|
||
def generate_clusters(num_clusters: int, | ||
num_points: int, | ||
spread: float, | ||
bound_for_x: Tuple[float, float], | ||
bound_for_y: Tuple[float, float], | ||
seed=None) -> List[List]: | ||
"""Generate random data for clustering. | ||
Source: | ||
https://stackoverflow.com/questions/44356063/how-to-generate-a-set-of-random-points-within-a-given-x-y-coordinates-in-an-x | ||
Args: | ||
num_clusters: The number of clusters to generate. | ||
num_points: The number of points to generate. | ||
spread: The spread of each cluster. Decrease for tighter clusters. | ||
bound_for_x: The bounds for possible values of X. | ||
bound_for_y: The bounds for possible values of Y. | ||
seed: Seed for the random number generator. | ||
Returns: | ||
K clusters consisting of N points. | ||
""" | ||
random = Random(seed) | ||
x_min, x_max = bound_for_x | ||
y_min, y_max = bound_for_y | ||
num_points_per_cluster = int(num_points / num_clusters) | ||
clusters = [] | ||
for _ in range(num_clusters): | ||
x = x_min + (x_max - x_min) * random.random() | ||
y = y_min + (y_max - y_min) * random.random() | ||
clusters.extend(generate_cluster(num_points_per_cluster, (x, y), spread, seed)) | ||
return clusters | ||
|
||
|
||
def generate_cluster(num_points: int, center: Tuple[float, float], spread: float, seed=None) -> List[List]: | ||
"""Generates a cluster of random points. | ||
Source: | ||
https://stackoverflow.com/questions/44356063/how-to-generate-a-set-of-random-points-within-a-given-x-y-coordinates-in-an-x | ||
Args: | ||
num_points: The number of points for the cluster. | ||
center: The center of the cluster. | ||
spread: How tightly to cluster the data. | ||
seed: Seed for the random number generator. | ||
Returns: | ||
A random cluster of consisting of N points. | ||
""" | ||
x, y = center | ||
seed = (seed + y) * x # Generate different looking clusters if called from generate_clusters | ||
random = Random(seed) | ||
points = [] | ||
for i in range(num_points): | ||
theta = 2 * pi * random.random() | ||
s = spread * random.random() | ||
point = [x + s * cos(theta), y + s * sin(theta)] | ||
points.append(point) | ||
return points |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .dbscan import DBSCAN |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
class DBSCAN: | ||
"""Density-Based Spatial Clustering of Applications with Noise.""" | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from typing import List | ||
|
||
from dataviz import generate_clusters | ||
|
||
|
||
def main(): | ||
num_clusters = 4 | ||
clusters = generate_data(num_clusters, seed=1) | ||
|
||
|
||
def generate_data(num_clusters: int, seed=None) -> List[List]: | ||
num_points = 20 | ||
spread = 7 | ||
bounds = (1, 100) | ||
return generate_clusters(num_clusters, num_points, spread, bounds, bounds, seed) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
attrs==17.4.0 | ||
certifi==2018.1.18 | ||
chardet==3.0.4 | ||
coverage==4.5.1 | ||
coveralls==1.3.0 | ||
cycler==0.10.0 | ||
docopt==0.6.2 | ||
idna==2.6 | ||
kiwisolver==1.0.1 | ||
matplotlib==2.2.2 | ||
more-itertools==4.1.0 | ||
numpy==1.14.2 | ||
pandas==0.22.0 | ||
pluggy==0.6.0 | ||
py==1.5.3 | ||
pyparsing==2.2.0 | ||
pytest==3.5.0 | ||
pytest-cov==2.5.1 | ||
python-dateutil==2.7.2 | ||
pytz==2018.4 | ||
requests==2.18.4 | ||
scipy==1.0.1 | ||
seaborn==0.8.1 | ||
six==1.11.0 | ||
urllib3==1.22 |
Binary file not shown.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import unittest | ||
|
||
from dbscan import DBSCAN | ||
|
||
|
||
class DBSCANTest(unittest.TestCase): | ||
|
||
def test_fit(self): | ||
dbscan = DBSCAN() | ||
pass | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |