# Clustering for gevtev

In [2]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
from sklearn.neighbors import DistanceMetric
from sklearn.metrics.pairwise import pairwise_distances
from sklearn import cluster
from sklearn.decomposition.pca import PCA
from sklearn.manifold import TSNE
import seaborn as sns
from definitions import *
from utils import *

In [3]:
#path_to_gamma_bins shoud contain an absolute path to your gamma-bins directory
path_to_gamma_bins = os.path.dirname(os.getcwd()) + "/"

In [4]:
#name columns included in the final display
other_names = ['gev_1FGL_Name', 'gev_CLASS1', 'tev_fermi_names', 'tev_classes', 's_MAIN_ID', 's_class','s_simbad', 'marked'] + list_xmm_other_columns()
#from gamma bins directory
path_to_catalog = "markeddata/" + get_name_for_gevtevxmm() + "_normalized.txt"

In [5]:
spectra = pd.read_csv(path_to_gamma_bins + path_to_catalog, sep='\t')
print(path_to_gamma_bins +path_to_catalog)
spectra.head()

/home/masha/PycharmProjects/gamma-bins/markeddata/gevtevxmm_simbadclasses_06_normalized.txt


Unnamed: 0,xmm_SC_EP_1_FLUX,xmm_SC_EP_2_FLUX,xmm_SC_EP_3_FLUX,xmm_SC_EP_4_FLUX,xmm_SC_EP_5_FLUX,gev_nuFnu10000_100000,gev_nuFnu1000_3000,gev_nuFnu100_300,gev_nuFnu3000_10000,gev_nuFnu300_1000,...,gev_GLAT,gev_RAJ2000,gev_DEJ2000,tev_fermi_names,tev_classes,tev_glat,tev_glon,tev_pos_dec,tev_pos_ra,marked
0,0.068553,0.061488,0.059425,0.059511,0.062076,0.06791,0.06777,0.075313,0.06936,0.066077,...,1.337755,6.4326,64.069298,b'',b'snr',1.41293,120.092361,64.140802,6.339723,0
1,0.068907,0.061626,0.059742,0.060119,0.06359,0.067621,0.067482,0.074993,0.069065,0.065796,...,1.337755,6.4326,64.069298,b'',b'snr',1.41293,120.092361,64.140802,6.339723,0
2,0.081538,0.073453,0.0731,0.074908,-inf,0.068704,0.068562,0.076194,0.070171,0.06685,...,1.337755,6.4326,64.069298,b'',b'snr',1.41293,120.092361,64.140802,6.339723,0
3,0.072884,0.066512,0.065242,0.065769,0.07676,0.06435,0.064217,0.071365,0.065725,0.062614,...,1.337755,6.4326,64.069298,b'',b'snr',1.41293,120.092361,64.140802,6.339723,0
4,0.066945,0.059547,0.057954,0.058218,0.062214,0.068519,0.068377,0.075988,0.069982,0.06667,...,1.337755,6.4326,64.069298,b'',b'snr',1.41293,120.092361,64.140802,6.339723,0


In [6]:
spectra = spectra.copy()
spectra = spectra.reset_index(drop=True)

In [7]:
spectrum_columns = list_gev_spectrum_columns() + list_tev_spectrum_columns()

In [8]:
only_spectra = spectra[spectrum_columns].copy()

In [9]:
matrix_inf = (only_spectra != -np.inf)

In [15]:
matrix_inf

Unnamed: 0,gev_nuFnu10000_100000,gev_nuFnu1000_3000,gev_nuFnu100_300,gev_nuFnu3000_10000,gev_nuFnu300_1000,gev_nuFnu30_100,tev_0.3TeV,tev_1TeV,tev_3TeV,tev_10TeV,tev_30TeV
0,True,True,True,True,True,False,True,True,True,True,True
1,True,True,True,True,True,False,True,True,True,True,True
2,True,True,True,True,True,False,True,True,True,True,True
3,True,True,True,True,True,False,True,True,True,True,True
4,True,True,True,True,True,False,True,True,True,True,True
5,True,True,True,True,True,False,True,True,True,True,True
6,True,True,True,True,True,False,True,True,True,True,True
7,True,True,True,True,True,False,True,True,True,True,True
8,True,True,True,True,True,False,True,True,True,True,True
9,True,True,True,True,True,False,True,True,True,True,True


In [11]:
only_spectra

Unnamed: 0,gev_nuFnu10000_100000,gev_nuFnu1000_3000,gev_nuFnu100_300,gev_nuFnu3000_10000,gev_nuFnu300_1000,gev_nuFnu30_100,tev_0.3TeV,tev_1TeV,tev_3TeV,tev_10TeV,tev_30TeV
0,0.067910,0.067770,0.075313,0.069360,0.066077,-inf,0.066280,0.070908,0.073742,0.068283,0.063302
1,0.067621,0.067482,0.074993,0.069065,0.065796,-inf,0.065999,0.070606,0.073429,0.067993,0.063033
2,0.068704,0.068562,0.076194,0.070171,0.066850,-inf,0.067055,0.071737,0.074605,0.069082,0.064042
3,0.064350,0.064217,0.071365,0.065725,0.062614,-inf,0.062806,0.067191,0.069877,0.064704,0.059984
4,0.068519,0.068377,0.075988,0.069982,0.066670,-inf,0.066875,0.071543,0.074403,0.068896,0.063870
5,0.064035,0.063903,0.071016,0.065403,0.062307,-inf,0.062499,0.066862,0.069535,0.064388,0.059691
6,0.066680,0.066542,0.073949,0.068104,0.064880,-inf,0.065080,0.069623,0.072406,0.067047,0.062156
7,0.064007,0.063875,0.070985,0.065374,0.062280,-inf,0.062472,0.066833,0.069505,0.064359,0.059665
8,0.063906,0.063774,0.070873,0.065271,0.062181,-inf,0.062373,0.066727,0.069395,0.064258,0.059570
9,0.063735,0.063603,0.070683,0.065096,0.062015,-inf,0.062206,0.066548,0.069209,0.064085,0.059411


In [7]:
def get_distance_metric(metric_function):
    if type(metric_function) == str:
        dist = DistanceMetric.get_metric(metric_function)
    else:
        dist = DistanceMetric.get_metric("pyfunc", func=metric_function)
    return dist

In [8]:
from matplotlib import colors as mcolors


colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)

# Sort colors by hue, saturation, value and name.
by_hsv = sorted((tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name)
                for name, color in colors.items())
sorted_names = [name for hsv, name in by_hsv]
colors = list(colors.values())[19:]

* ### earth mover's distance

In [9]:
# earth mover's distance
metric_function = st.wasserstein_distance
spectra_distances = pairwise_distances(spectra[spectrum_columns], metric=metric_function, n_jobs=6)
# spectra_distances = get_distance_metric(metric_function).pairwise(spectra[spectrum_columns])
#pd.DataFrame(spectra_distances).to_csv(path_to_gamma_bins +"markeddata/gevtev_spectra_EM_distances.txt", index=False)

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/home/masha/anaconda3/lib/python3.6/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
/home/masha/anaconda3/lib/python3.6/runpy.py in _run_code(code=<code object <module> at 0x7f7dbb84ac90, file "/...3.6/site-packages/ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/home/masha/anaconda3/lib/python3.6/site-packages/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/home/masha/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/home/masha/.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x7f7dbb84ac90, file "/...3.6/site-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/home/masha/anaconda3/lib/python3.6/site-packages/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/home/masha/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/home/masha/.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    492         if self.poller is not None:
    493             self.poller.start()
    494         self.kernel.start()
    495         self.io_loop = ioloop.IOLoop.current()
    496         try:
--> 497             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    498         except KeyboardInterrupt:
    499             pass
    500 
    501 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    127         except (RuntimeError, AssertionError):
    128             old_loop = None
    129         try:
    130             self._setup_logging()
    131             asyncio.set_event_loop(self.asyncio_loop)
--> 132             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Uni...EventLoop running=True closed=False debug=False>>
    133         finally:
    134             asyncio.set_event_loop(old_loop)
    135 
    136     def stop(self):

...........................................................................
/home/masha/anaconda3/lib/python3.6/asyncio/base_events.py in run_forever(self=<_UnixSelectorEventLoop running=True closed=False debug=False>)
    417             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    418                                    finalizer=self._asyncgen_finalizer_hook)
    419         try:
    420             events._set_running_loop(self)
    421             while True:
--> 422                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_UnixS...EventLoop running=True closed=False debug=False>>
    423                 if self._stopping:
    424                     break
    425         finally:
    426             self._stopping = False

...........................................................................
/home/masha/anaconda3/lib/python3.6/asyncio/base_events.py in _run_once(self=<_UnixSelectorEventLoop running=True closed=False debug=False>)
   1429                         logger.warning('Executing %s took %.3f seconds',
   1430                                        _format_handle(handle), dt)
   1431                 finally:
   1432                     self._current_handle = None
   1433             else:
-> 1434                 handle._run()
        handle._run = <bound method Handle._run of <Handle IOLoop._run_callback(functools.par...7f7d76604158>))>>
   1435         handle = None  # Needed to break cycles when an exception occurs.
   1436 
   1437     def _set_coroutine_wrapper(self, enabled):
   1438         try:

...........................................................................
/home/masha/anaconda3/lib/python3.6/asyncio/events.py in _run(self=<Handle IOLoop._run_callback(functools.par...7f7d76604158>))>)
    140             self._callback = None
    141             self._args = None
    142 
    143     def _run(self):
    144         try:
--> 145             self._callback(*self._args)
        self._callback = <bound method IOLoop._run_callback of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (functools.partial(<function wrap.<locals>.null_wrapper at 0x7f7d76604158>),)
    146         except Exception as exc:
    147             cb = _format_callback_source(self._callback, self._args)
    148             msg = 'Exception in callback {}'.format(cb)
    149             context = {

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py in _run_callback(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, callback=functools.partial(<function wrap.<locals>.null_wrapper at 0x7f7d76604158>))
    753         """Runs a callback with error handling.
    754 
    755         For use in subclasses.
    756         """
    757         try:
--> 758             ret = callback()
        ret = undefined
        callback = functools.partial(<function wrap.<locals>.null_wrapper at 0x7f7d76604158>)
    759             if ret is not None:
    760                 from tornado import gen
    761                 # Functions that return Futures typically swallow all
    762                 # exceptions and store them in the Future.  If a Future

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=(), **kwargs={})
    295         # Fast path when there are no active contexts.
    296         def null_wrapper(*args, **kwargs):
    297             try:
    298                 current_state = _state.contexts
    299                 _state.contexts = cap_contexts[0]
--> 300                 return fn(*args, **kwargs)
        args = ()
        kwargs = {}
    301             finally:
    302                 _state.contexts = current_state
    303         null_wrapper._wrapped = True
    304         return null_wrapper

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in <lambda>()
    531             return
    532 
    533         if state & self.socket.events:
    534             # events still exist that haven't been processed
    535             # explicitly schedule handling to avoid missing events due to edge-triggered FDs
--> 536             self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
    537 
    538     def _init_io_state(self):
    539         """initialize the ioloop event handler"""
    540         with stack_context.NullContext():

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=0)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    295         # Fast path when there are no active contexts.
    296         def null_wrapper(*args, **kwargs):
    297             try:
    298                 current_state = _state.contexts
    299                 _state.contexts = cap_contexts[0]
--> 300                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    301             finally:
    302                 _state.contexts = current_state
    303         null_wrapper._wrapped = True
    304         return null_wrapper

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': '# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 9, 25, 12, 51, 54, 38352, tzinfo=tzutc()), 'msg_id': '9bf07e4c3a8a4513838b6f672a792bb0', 'msg_type': 'execute_request', 'session': 'cdf1cec532ac4165814644f14ef9b625', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '9bf07e4c3a8a4513838b6f672a792bb0', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warning("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'cdf1cec532ac4165814644f14ef9b625']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': '# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 9, 25, 12, 51, 54, 38352, tzinfo=tzutc()), 'msg_id': '9bf07e4c3a8a4513838b6f672a792bb0', 'msg_type': 'execute_request', 'session': 'cdf1cec532ac4165814644f14ef9b625', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '9bf07e4c3a8a4513838b6f672a792bb0', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'cdf1cec532ac4165814644f14ef9b625'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': '# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 9, 25, 12, 51, 54, 38352, tzinfo=tzutc()), 'msg_id': '9bf07e4c3a8a4513838b6f672a792bb0', 'msg_type': 'execute_request', 'session': 'cdf1cec532ac4165814644f14ef9b625', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '9bf07e4c3a8a4513838b6f672a792bb0', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = '# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)', store_history=True, silent=False, shell_futures=True)
   2657         -------
   2658         result : :class:`ExecutionResult`
   2659         """
   2660         try:
   2661             result = self._run_cell(
-> 2662                 raw_cell, store_history, silent, shell_futures)
        raw_cell = '# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)'
        store_history = True
        silent = False
        shell_futures = True
   2663         finally:
   2664             self.events.trigger('post_execute')
   2665             if not silent:
   2666                 self.events.trigger('post_run_cell', result)

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)', store_history=True, silent=False, shell_futures=True)
   2780                 self.displayhook.exec_result = result
   2781 
   2782                 # Execute the user code
   2783                 interactivity = 'none' if silent else self.ast_node_interactivity
   2784                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2785                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2786                 
   2787                 self.last_execution_succeeded = not has_raised
   2788                 self.last_execution_result = result
   2789 

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Assign object>], cell_name='<ipython-input-9-56a5bbefa96f>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 7f7d76f99470, executi...rue silent=False shell_futures=True> result=None>)
   2896             raise ValueError("Interactivity was %r" % interactivity)
   2897         try:
   2898             for i, node in enumerate(to_run_exec):
   2899                 mod = ast.Module([node])
   2900                 code = compiler(mod, cell_name, "exec")
-> 2901                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7f7d765fb9c0, file "<ipython-input-9-56a5bbefa96f>", line 3>
        result = <ExecutionResult object at 7f7d76f99470, executi...rue silent=False shell_futures=True> result=None>
   2902                     return True
   2903 
   2904             for i, node in enumerate(to_run_interactive):
   2905                 mod = ast.Interactive([node])

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7f7d765fb9c0, file "<ipython-input-9-56a5bbefa96f>", line 3>, result=<ExecutionResult object at 7f7d76f99470, executi...rue silent=False shell_futures=True> result=None>)
   2956         outflag = True  # happens in more places, so it's easier as default
   2957         try:
   2958             try:
   2959                 self.hooks.pre_run_code_hook()
   2960                 #rprint('Running code', repr(code_obj)) # dbg
-> 2961                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7f7d765fb9c0, file "<ipython-input-9-56a5bbefa96f>", line 3>
        self.user_global_ns = {'DistanceMetric': <class 'sklearn.neighbors.dist_metrics.DistanceMetric'>, 'In': ['', 'import pandas as pd\nimport os\nimport numpy as np...sns\nfrom definitions import *\nfrom utils import *', '#path_to_gamma_bins shoud contain an absolute pa...o_gamma_bins = os.path.dirname(os.getcwd()) + "/"', '#name columns included in the final display\nothe.../" + get_name_for_gevtevxmm() + "_normalized.txt"', 'spectra = pd.read_csv(path_to_gamma_bins + path_...th_to_gamma_bins +path_to_catalog)\nspectra.head()', 'spectra = spectra.copy()\nspectra = spectra.reset_index(drop=True)', 'spectrum_columns = list_gev_spectrum_columns() + list_tev_spectrum_columns()', 'def get_distance_metric(metric_function):\n    if...c("pyfunc", func=metric_function)\n    return dist', 'from matplotlib import colors as mcolors\n\n\ncolor...me in by_hsv]\ncolors = list(colors.values())[19:]', '# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)'], 'Out': {4:    xmm_SC_EP_1_FLUX  xmm_SC_EP_2_FLUX  xmm_SC_EP...40802   6.339723       0  

[5 rows x 34 columns]}, 'PCA': <class 'sklearn.decomposition.pca.PCA'>, 'SkyCoord': <class 'astropy.coordinates.sky_coordinate.SkyCoord'>, 'TSNE': <class 'sklearn.manifold.t_sne.TSNE'>, '_':    xmm_SC_EP_1_FLUX  xmm_SC_EP_2_FLUX  xmm_SC_EP...40802   6.339723       0  

[5 rows x 34 columns], '_4':    xmm_SC_EP_1_FLUX  xmm_SC_EP_2_FLUX  xmm_SC_EP...40802   6.339723       0  

[5 rows x 34 columns], '__': '', '___': '', ...}
        self.user_ns = {'DistanceMetric': <class 'sklearn.neighbors.dist_metrics.DistanceMetric'>, 'In': ['', 'import pandas as pd\nimport os\nimport numpy as np...sns\nfrom definitions import *\nfrom utils import *', '#path_to_gamma_bins shoud contain an absolute pa...o_gamma_bins = os.path.dirname(os.getcwd()) + "/"', '#name columns included in the final display\nothe.../" + get_name_for_gevtevxmm() + "_normalized.txt"', 'spectra = pd.read_csv(path_to_gamma_bins + path_...th_to_gamma_bins +path_to_catalog)\nspectra.head()', 'spectra = spectra.copy()\nspectra = spectra.reset_index(drop=True)', 'spectrum_columns = list_gev_spectrum_columns() + list_tev_spectrum_columns()', 'def get_distance_metric(metric_function):\n    if...c("pyfunc", func=metric_function)\n    return dist', 'from matplotlib import colors as mcolors\n\n\ncolor...me in by_hsv]\ncolors = list(colors.values())[19:]', '# earth mover\'s distance\nmetric_function = st.wa...ta/gevtev_spectra_EM_distances.txt", index=False)'], 'Out': {4:    xmm_SC_EP_1_FLUX  xmm_SC_EP_2_FLUX  xmm_SC_EP...40802   6.339723       0  

[5 rows x 34 columns]}, 'PCA': <class 'sklearn.decomposition.pca.PCA'>, 'SkyCoord': <class 'astropy.coordinates.sky_coordinate.SkyCoord'>, 'TSNE': <class 'sklearn.manifold.t_sne.TSNE'>, '_':    xmm_SC_EP_1_FLUX  xmm_SC_EP_2_FLUX  xmm_SC_EP...40802   6.339723       0  

[5 rows x 34 columns], '_4':    xmm_SC_EP_1_FLUX  xmm_SC_EP_2_FLUX  xmm_SC_EP...40802   6.339723       0  

[5 rows x 34 columns], '__': '', '___': '', ...}
   2962             finally:
   2963                 # Reset our crash handler in place
   2964                 sys.excepthook = old_excepthook
   2965         except SystemExit as e:

...........................................................................
/home/masha/PycharmProjects/gamma-bins/spectrums/<ipython-input-9-56a5bbefa96f> in <module>()
      1 # earth mover's distance
      2 metric_function = st.wasserstein_distance
----> 3 spectra_distances = pairwise_distances(spectra[spectrum_columns], metric=metric_function, n_jobs=6)
      4 # spectra_distances = get_distance_metric(metric_function).pairwise(spectra[spectrum_columns])
      5 #pd.DataFrame(spectra_distances).to_csv(path_to_gamma_bins +"markeddata/gevtev_spectra_EM_distances.txt", index=False)

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/sklearn/metrics/pairwise.py in pairwise_distances(X=      gev_nuFnu10000_100000  gev_nuFnu1000_3000 ...      -inf       -inf  

[2175 rows x 11 columns], Y=None, metric=<function wasserstein_distance>, n_jobs=6, **kwds={})
   1242         if n_jobs == 1 and X is Y:
   1243             return distance.squareform(distance.pdist(X, metric=metric,
   1244                                                       **kwds))
   1245         func = partial(distance.cdist, metric=metric, **kwds)
   1246 
-> 1247     return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
        X =       gev_nuFnu10000_100000  gev_nuFnu1000_3000 ...      -inf       -inf  

[2175 rows x 11 columns]
        Y = None
        func = functools.partial(<function _pairwise_callable a...function wasserstein_distance at 0x7f7d7f398bf8>)
        n_jobs = 6
        kwds = {}
   1248 
   1249 
   1250 # These distances recquire boolean arrays, when using scipy.spatial.distance
   1251 PAIRWISE_BOOLEAN_FUNCTIONS = [

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/sklearn/metrics/pairwise.py in _parallel_pairwise(X=      gev_nuFnu10000_100000  gev_nuFnu1000_3000 ...      -inf       -inf  

[2175 rows x 11 columns], Y=      gev_nuFnu10000_100000  gev_nuFnu1000_3000 ...      -inf       -inf  

[2175 rows x 11 columns], func=functools.partial(<function _pairwise_callable a...function wasserstein_distance at 0x7f7d7f398bf8>), n_jobs=6, **kwds={})
   1091 
   1092     # TODO: in some cases, backend='threading' may be appropriate
   1093     fd = delayed(func)
   1094     ret = Parallel(n_jobs=n_jobs, verbose=0)(
   1095         fd(X, Y[s], **kwds)
-> 1096         for s in gen_even_slices(Y.shape[0], n_jobs))
        Y.shape = (2175, 11)
        n_jobs = 6
   1097 
   1098     return np.hstack(ret)
   1099 
   1100 

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=6), iterable=<generator object _parallel_pairwise.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=6)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Tue Sep 25 15:51:55 2018
PID: 20901                   Python 3.6.6: /home/masha/anaconda3/bin/python
...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(functools.partial(<function _pairwise_callable a...function wasserstein_distance at 0x7f7d7f398bf8>), (      gev_nuFnu10000_100000  gev_nuFnu1000_3000 ...      -inf       -inf  

[2175 rows x 11 columns],      gev_nuFnu10000_100000  gev_nuFnu1000_3000  ...   0.065220   0.070567  

[363 rows x 11 columns]), {})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = functools.partial(<function _pairwise_callable a...function wasserstein_distance at 0x7f7d7f398bf8>)
        args = (      gev_nuFnu10000_100000  gev_nuFnu1000_3000 ...      -inf       -inf  

[2175 rows x 11 columns],      gev_nuFnu10000_100000  gev_nuFnu1000_3000  ...   0.065220   0.070567  

[363 rows x 11 columns])
        kwargs = {}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/sklearn/metrics/pairwise.py in _pairwise_callable(X=      gev_nuFnu10000_100000  gev_nuFnu1000_3000 ...      -inf       -inf  

[2175 rows x 11 columns], Y=     gev_nuFnu10000_100000  gev_nuFnu1000_3000  ...   0.065220   0.070567  

[363 rows x 11 columns], metric=<function wasserstein_distance>, **kwds={})
   1099 
   1100 
   1101 def _pairwise_callable(X, Y, metric, **kwds):
   1102     """Handle the callable case for pairwise_{distances,kernels}
   1103     """
-> 1104     X, Y = check_pairwise_arrays(X, Y)
        X =       gev_nuFnu10000_100000  gev_nuFnu1000_3000 ...      -inf       -inf  

[2175 rows x 11 columns]
        Y =      gev_nuFnu10000_100000  gev_nuFnu1000_3000  ...   0.065220   0.070567  

[363 rows x 11 columns]
   1105 
   1106     if X is Y:
   1107         # Only calculate metric for upper triangle
   1108         out = np.zeros((X.shape[0], Y.shape[0]), dtype='float')

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/sklearn/metrics/pairwise.py in check_pairwise_arrays(X=array([[0.0679099 , 0.06776977, 0.07531292, ...,.....,       -inf,       -inf,
              -inf]]), Y=array([[0.0679099 , 0.06776977, 0.07531292, ...,....., 0.06289537, 0.06522032,
        0.07056718]]), precomputed=False, dtype=<class 'float'>)
    105     if Y is X or Y is None:
    106         X = Y = check_array(X, accept_sparse='csr', dtype=dtype,
    107                             warn_on_dtype=warn_on_dtype, estimator=estimator)
    108     else:
    109         X = check_array(X, accept_sparse='csr', dtype=dtype,
--> 110                         warn_on_dtype=warn_on_dtype, estimator=estimator)
        warn_on_dtype = False
        estimator = 'check_pairwise_arrays'
    111         Y = check_array(Y, accept_sparse='csr', dtype=dtype,
    112                         warn_on_dtype=warn_on_dtype, estimator=estimator)
    113 
    114     if precomputed:

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array=array([[0.0679099 , 0.06776977, 0.07531292, ...,.....,       -inf,       -inf,
              -inf]]), accept_sparse='csr', dtype=<class 'float'>, order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, ensure_min_samples=1, ensure_min_features=1, warn_on_dtype=False, estimator='check_pairwise_arrays')
    448             array = array.astype(np.float64)
    449         if not allow_nd and array.ndim >= 3:
    450             raise ValueError("Found array with dim %d. %s expected <= 2."
    451                              % (array.ndim, estimator_name))
    452         if force_all_finite:
--> 453             _assert_all_finite(array)
        array = array([[0.0679099 , 0.06776977, 0.07531292, ...,.....,       -inf,       -inf,
              -inf]])
    454 
    455     shape_repr = _shape_repr(array.shape)
    456     if ensure_min_samples > 0:
    457         n_samples = _num_samples(array)

...........................................................................
/home/masha/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py in _assert_all_finite(X=array([[0.0679099 , 0.06776977, 0.07531292, ...,.....,       -inf,       -inf,
              -inf]]))
     39     # everything is finite; fall back to O(n) space np.isfinite to prevent
     40     # false positives from overflow in sum method.
     41     if (X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum())
     42             and not np.isfinite(X).all()):
     43         raise ValueError("Input contains NaN, infinity"
---> 44                          " or a value too large for %r." % X.dtype)
        X.dtype = dtype('float64')
     45 
     46 
     47 def assert_all_finite(X):
     48     """Throw a ValueError if X contains NaN or infinity.

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
___________________________________________________________________________

In [None]:
from utils import *

In [None]:
spectra_2D = build_images_KMeans(spectra, spectrum_columns, spectra_distances, colors, TSNE_learning_rate=300, TSNE_n_iter=340, TSNE_learning_rate2=200)

In [None]:
list_spectra_clusters, centers = print_clusters_structure_KMeans(spectra, spectrum_columns, other_names, spectra_distances, 6, colors,spectra_2D)
for i in range(len(list_spectra_clusters)):
    print("{} cluster ({})".format(i, colors[i]))
    display(list_spectra_clusters[i])
print("centroids:")
display(centers)

In [None]:
spectra_2D = build_images_DBSCAN(spectra, spectrum_columns, spectra_distances, colors, TSNE_learning_rate=500, TSNE_n_iter=1500, TSNE_learning_rate2=300)

In [None]:
list_spectra_clusters, centers = print_clusters_structure_DBSCAN(spectra, spectrum_columns, other_names, spectra_distances, 0.06, colors, spectra_2D)
for i in range(len(list_spectra_clusters)):
    print("{} cluster ({})".format(i, colors[i]))
    display(list_spectra_clusters[i])
print("centroids:")
display(centers)

In [None]:
spectra_distances.min(), spectra_distances.max()

* ### euclidean distance

In [None]:
# euclidean distance
from scipy.spatial.distance import euclidean
metric_function = "euclidean"
spectra_distances = pairwise_distances(spectra[spectrum_columns], metric=metric_function)

In [None]:
spectra_distances.min(), spectra_distances.max()

In [None]:
spectra_2D = build_images_KMeans(spectra, spectrum_columns, spectra_distances, colors, TSNE_learning_rate=500, TSNE_n_iter=1500, TSNE_learning_rate2=300)
list_spectra_clusters, centers = print_clusters_structure_KMeans(spectra, spectrum_columns, other_names, spectra_distances, 6, colors, spectra_2D)
for i in range(len(list_spectra_clusters)):
    print("{} cluster ({})".format(i, colors[i]))
    display(list_spectra_clusters[i])
print("centroids:")
display(centers)

In [None]:
markers = ['^', 'o']
cols = spectra['marked'].apply(lambda x: colors[x])
col = spectra['marked']

plt.subplots(figsize=(18, 6))
plt.subplot(131)
plt.title("PCA")
for i in range(len(spectra_2D)):
        #print(i)
        #print(spectra_2D[i, 0])
        #print(spectra_2D[i, 1])
        #print(cols[i])
        #print(col[i])
        if col[i] == 0:
            markersize = 5
        else:
            markersize = 100
        plt.scatter(spectra_2D[i, 0], spectra_2D[i, 1], c=cols[i], alpha=0.5, marker=markers[col[i]], s=markersize)

In [None]:
eps_l = [0.1 * i for i in range(12, 0, -2)] + [0.1]
print(eps_l)
spectra_2D = build_images_DBSCAN(spectra, spectrum_columns, spectra_distances, colors, eps_l=eps_l, TSNE_learning_rate=1000, TSNE_n_iter=1500, TSNE_learning_rate2=900)
list_spectra_clusters, centers = print_clusters_structure_DBSCAN(spectra, spectrum_columns, other_names, spectra_distances, 0.6, colors, spectra_2D)
for i in range(len(list_spectra_clusters)):
    print("{} cluster ({})".format(i, colors[i]))
    display(list_spectra_clusters[i])
print("centroids:")
display(centers)

* ### cosine distance

In [None]:
# cosine distance
# from scipy.spatial.distance import cosine
metric_function = "cosine"
spectra_distances = pairwise_distances(spectra[spectrum_columns], metric=metric_function)

In [None]:
spectra_2D = build_images_KMeans(spectra, spectrum_columns, spectra_distances, colors, TSNE_learning_rate=500, TSNE_n_iter=1500, TSNE_learning_rate2=300)
list_spectra_clusters, centers = print_clusters_structure_KMeans(spectra, spectrum_columns, other_names, spectra_distances, 5, colors, spectra_2D)
for i in range(len(list_spectra_clusters)):
    print("{} cluster ({})".format(i, colors[i]))
    display(list_spectra_clusters[i])
print("centroids:")
display(centers)

In [None]:
eps_l = [0.1 * i for i in range(10, 0, -2)] + [0.1]
print(eps_l)
spectra_2D = build_images_DBSCAN(spectra, spectrum_columns, spectra_distances, colors, eps_l=eps_l, TSNE_learning_rate=1000, TSNE_n_iter=1500, TSNE_learning_rate2=900)
list_spectra_clusters, centers = print_clusters_structure_DBSCAN(spectra, spectrum_columns, other_names, spectra_distances, 0.6, colors, spectra_2D)
for i in range(len(list_spectra_clusters)):
    print("{} cluster ({})".format(i, colors[i]))
    display(list_spectra_clusters[i])
print("centroids:")
display(centers)

* ### energy distance

In [None]:
# energy distance
metric_function = st.energy_distance
spectra_distances = pairwise_distances(spectra[spectrum_columns], metric=metric_function)

In [None]:
spectra_2D  = build_images_KMeans(spectra, spectrum_columns, spectra_distances, colors, TSNE_learning_rate=500, TSNE_n_iter=1500, TSNE_learning_rate2=300)
list_spectra_clusters, centers = print_clusters_structure_KMeans(spectra, spectrum_columns, other_names, spectra_distances, 4, colors, spectra_2D)
for i in range(len(list_spectra_clusters)):
    print("{} cluster ({})".format(i, colors[i]))
    display(list_spectra_clusters[i])
print("centroids:")
display(centers)

In [None]:
eps_l = [0.01 * i for i in range(20, 0, -3)]
print(eps_l)
spectra_2D = build_images_DBSCAN(spectra, spectrum_columns, spectra_distances, colors, eps_l=eps_l, TSNE_learning_rate=1000, TSNE_n_iter=1500, TSNE_learning_rate2=900)
list_spectra_clusters, centers = print_clusters_structure_DBSCAN(spectra, spectrum_columns, other_names, spectra_distances, 0.17, colors, spectra_2D)
for i in range(len(list_spectra_clusters)):
    print("{} cluster ({})".format(i, colors[i]))
    display(list_spectra_clusters[i])
print("centroids:")
display(centers)