In [31]:
import pandas as pd
import numpy as np

# Loading the necessary libraries
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn import model_selection
from sklearn.model_selection import GridSearchCV
#from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.metrics import silhouette_samples, silhouette_score

from sklearn.cluster import KMeans
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import SpectralClustering
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN



cust = pd.read_csv('custody_ML.csv')
shoot = pd.read_csv('shootings_ML.csv')

In [20]:
cust.head()

Unnamed: 0.1,Unnamed: 0,dept,custody_type,facility,race,sex,death_type,charge_status,age,year,month
0,0,389,2,134,4,1,5,0,58.0,2012,9
1,1,389,2,115,3,1,5,0,76.0,2016,9
2,2,175,1,0,4,1,6,2,30.0,2016,12
3,3,389,2,60,1,1,1,0,39.0,2011,11
4,4,389,2,77,1,1,5,0,31.0,2014,12


In [21]:
shoot.head()

Unnamed: 0.1,Unnamed: 0,age,number_officers,fatality,armed,race,sex,stop_reason,officer_race,dept,year,month
0,103,26.0,1.0,0,2,4,1,23,13,0,2010,9
1,104,16.0,1.0,0,2,2,1,13,40,0,2010,10
2,105,26.0,2.0,0,2,0,1,66,48,0,2010,11
3,106,35.0,1.0,0,0,4,1,86,5,0,2010,12
4,107,30.0,1.0,0,2,2,1,66,13,0,2011,5


In [32]:
cust.drop('Unnamed: 0', axis=1, inplace=True)

In [33]:
shoot.drop('Unnamed: 0', axis=1, inplace=True)

In [34]:
#Scale cust df
X_col = cust.columns

cust_scaled = pd.DataFrame(StandardScaler().fit_transform(cust), columns= X_col)
cust_scaled.head()

Unnamed: 0,dept,custody_type,facility,race,sex,death_type,charge_status,age,year,month
0,0.555878,0.123491,0.230097,1.156565,0.238506,0.155584,-0.605742,0.590814,0.412088,0.723706
1,0.555878,0.123491,-0.095784,0.406515,0.238506,0.155584,-0.605742,1.798521,1.579194,0.723706
2,-1.378794,-1.037655,-2.068228,1.156565,0.238506,0.943599,1.487142,-1.287842,1.579194,1.595334
3,0.555878,0.123491,-1.039127,-1.093584,0.238506,-2.996475,-0.605742,-0.683988,0.120312,1.304791
4,0.555878,0.123491,-0.747548,-1.093584,0.238506,0.155584,-0.605742,-1.220747,0.995641,1.595334


In [35]:
#Scale shoot df
X_cols = shoot.columns

shoot_scaled = pd.DataFrame(StandardScaler().fit_transform(shoot), columns = X_cols)
shoot_scaled.head()

Unnamed: 0,age,number_officers,fatality,armed,race,sex,stop_reason,officer_race,dept,year,month
0,-0.670348,-0.39433,-1.387608,0.980761,1.548471,-0.543893,-1.792733,-1.051157,-1.900016,-1.560899,0.76152
1,-1.71653,-0.39433,-1.387608,0.980761,-0.096378,-0.543893,-2.118722,0.439038,-1.900016,-1.560899,1.051969
2,-0.670348,0.469955,-1.387608,0.980761,-1.741227,-0.543893,-0.390982,0.880578,-1.900016,-1.560899,1.342418
3,0.271215,-0.39433,-1.387608,-1.040776,1.548471,-0.543893,0.260995,-1.492696,-1.900016,-1.560899,1.632866
4,-0.251876,-0.39433,-1.387608,0.980761,-0.096378,-0.543893,-0.390982,-1.051157,-1.900016,-1.048604,-0.400275


In [36]:
#create training and testing groups of data
train_cust, test_cust= train_test_split(cust_scaled, test_size = 0.2, random_state = 11)

In [27]:
#KMeans Clustering
k_range = range(2,11)

for i in k_range:
    model = KMeans(n_clusters=i,random_state=11)
    pred =model.fit_predict(train_cust)
    silhouette_avg = silhouette_score(train_cust,pred)
    print('The number of clusters, %d, and silhouette coefficient is %0.2f'% (i,silhouette_avg))

The number of clusters, 2, and silhouette coefficient is 0.28
The number of clusters, 3, and silhouette coefficient is 0.31
The number of clusters, 4, and silhouette coefficient is 0.32
The number of clusters, 5, and silhouette coefficient is 0.18
The number of clusters, 6, and silhouette coefficient is 0.17
The number of clusters, 7, and silhouette coefficient is 0.18
The number of clusters, 8, and silhouette coefficient is 0.18
The number of clusters, 9, and silhouette coefficient is 0.18
The number of clusters, 10, and silhouette coefficient is 0.17


In [37]:
# Affinity propagation
affPro = AffinityPropagation()
affPro.fit_predict(train_cust)
centers = affPro.cluster_centers_indices_
labels = affPro.labels_
print("Silhouette Coefficient: %0.3f"% silhouette_score(train_cust, labels, metric='euclidean'))

Silhouette Coefficient: 0.162


In [38]:
#Tuning Affinity Propogation
def grid_aff(parameters, x):
    kfold = model_selection.KFold(n_splits=10, shuffle = True, random_state=11)
    aff = AffinityPropagation()
    clf = GridSearchCV(aff, parameters, n_jobs =-1, cv = kfold, scoring='adjusted_mutual_info_score')
    clf.fit(x)
    
    print(clf.best_estimator_)
    print(clf.best_score_)
    print(clf.cluster_centers_)

In [42]:
param_grid = {'damping': [0.5,0.75, 1], 'max_iter': [50, 100, 200, 300, 350], 'convergence_iter': [7, 15, 23], 'verbose':['True','False']}
grid_aff(param_grid, train_cust)

JoblibTypeError: JoblibTypeError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\Users\Lara\Miniconda3\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
C:\Users\Lara\Miniconda3\lib\runpy.py in _run_code(code=<code object <module> at 0x000001E6DBF37B70, fil...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\Lara\Miniconda3\lib\site-packages\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\Lara\Miniconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\L...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x000001E6DBF37B70, fil...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\Lara\Miniconda3\lib\site-packages\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\Lara\Miniconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\L...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         self.io_loop = ioloop.IOLoop.current()
    477         try:
--> 478             self.io_loop.start()
        self.io_loop.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    479         except KeyboardInterrupt:
    480             pass
    481 
    482 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\zmq\eventloop\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\tornado\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    276         if self.control_stream:
    277             self.control_stream.on_recv(self.dispatch_control, copy=False)
    278 
    279         def make_dispatcher(stream):
    280             def dispatcher(msg):
--> 281                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    282             return dispatcher
    283 
    284         for s in self.shell_streams:
    285             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': "param_grid = {'damping': [0.5,0.75, 1], 'max_ite...'True','False']}\ngrid_aff(param_grid, train_cust)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 1, 29, 22, 43, 2, 387131, tzinfo=tzutc()), 'msg_id': 'E3D7BF9AD35C48469FE1A959F2F59FE9', 'msg_type': 'execute_request', 'session': '1F5C1AC651564E9E8E3A00FE0AE30290', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'E3D7BF9AD35C48469FE1A959F2F59FE9', 'msg_type': 'execute_request', 'parent_header': {}})
    227             self.log.warn("Unknown message type: %r", msg_type)
    228         else:
    229             self.log.debug("%s: %s", msg_type, msg)
    230             self.pre_handler_hook()
    231             try:
--> 232                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'1F5C1AC651564E9E8E3A00FE0AE30290']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': "param_grid = {'damping': [0.5,0.75, 1], 'max_ite...'True','False']}\ngrid_aff(param_grid, train_cust)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 1, 29, 22, 43, 2, 387131, tzinfo=tzutc()), 'msg_id': 'E3D7BF9AD35C48469FE1A959F2F59FE9', 'msg_type': 'execute_request', 'session': '1F5C1AC651564E9E8E3A00FE0AE30290', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'E3D7BF9AD35C48469FE1A959F2F59FE9', 'msg_type': 'execute_request', 'parent_header': {}}
    233             except Exception:
    234                 self.log.error("Exception in message handler:", exc_info=True)
    235             finally:
    236                 self.post_handler_hook()

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'1F5C1AC651564E9E8E3A00FE0AE30290'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': "param_grid = {'damping': [0.5,0.75, 1], 'max_ite...'True','False']}\ngrid_aff(param_grid, train_cust)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 1, 29, 22, 43, 2, 387131, tzinfo=tzutc()), 'msg_id': 'E3D7BF9AD35C48469FE1A959F2F59FE9', 'msg_type': 'execute_request', 'session': '1F5C1AC651564E9E8E3A00FE0AE30290', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'E3D7BF9AD35C48469FE1A959F2F59FE9', 'msg_type': 'execute_request', 'parent_header': {}})
    392         if not silent:
    393             self.execution_count += 1
    394             self._publish_execute_input(code, parent, self.execution_count)
    395 
    396         reply_content = self.do_execute(code, silent, store_history,
--> 397                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    398 
    399         # Flush output before sending the reply.
    400         sys.stdout.flush()
    401         sys.stderr.flush()

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code="param_grid = {'damping': [0.5,0.75, 1], 'max_ite...'True','False']}\ngrid_aff(param_grid, train_cust)", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = "param_grid = {'damping': [0.5,0.75, 1], 'max_ite...'True','False']}\ngrid_aff(param_grid, train_cust)"
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=("param_grid = {'damping': [0.5,0.75, 1], 'max_ite...'True','False']}\ngrid_aff(param_grid, train_cust)",), **kwargs={'silent': False, 'store_history': True})
    528             )
    529         self.payload_manager.write_payload(payload)
    530 
    531     def run_cell(self, *args, **kwargs):
    532         self._last_traceback = None
--> 533         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ("param_grid = {'damping': [0.5,0.75, 1], 'max_ite...'True','False']}\ngrid_aff(param_grid, train_cust)",)
        kwargs = {'silent': False, 'store_history': True}
    534 
    535     def _showtraceback(self, etype, evalue, stb):
    536         # try to preserve ordering of tracebacks and print statements
    537         sys.stdout.flush()

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell="param_grid = {'damping': [0.5,0.75, 1], 'max_ite...'True','False']}\ngrid_aff(param_grid, train_cust)", store_history=True, silent=False, shell_futures=True)
   2723                 self.displayhook.exec_result = result
   2724 
   2725                 # Execute the user code
   2726                 interactivity = "none" if silent else self.ast_node_interactivity
   2727                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2728                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2729                 
   2730                 self.last_execution_succeeded = not has_raised
   2731                 self.last_execution_result = result
   2732 

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Expr object>], cell_name='<ipython-input-42-6d5282d5cdf9>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 1e692470550, executio..._before_exec=None error_in_exec=None result=None>)
   2851                     return True
   2852 
   2853             for i, node in enumerate(to_run_interactive):
   2854                 mod = ast.Interactive([node])
   2855                 code = compiler(mod, cell_name, "single")
-> 2856                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x000001E692428E40, file "<ipython-input-42-6d5282d5cdf9>", line 2>
        result = <ExecutionResult object at 1e692470550, executio..._before_exec=None error_in_exec=None result=None>
   2857                     return True
   2858 
   2859             # Flush softspace
   2860             if softspace(sys.stdout, 0):

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x000001E692428E40, file "<ipython-input-42-6d5282d5cdf9>", line 2>, result=<ExecutionResult object at 1e692470550, executio..._before_exec=None error_in_exec=None result=None>)
   2905         outflag = True  # happens in more places, so it's easier as default
   2906         try:
   2907             try:
   2908                 self.hooks.pre_run_code_hook()
   2909                 #rprint('Running code', repr(code_obj)) # dbg
-> 2910                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x000001E692428E40, file "<ipython-input-42-6d5282d5cdf9>", line 2>
        self.user_global_ns = {'AffinityPropagation': <class 'sklearn.cluster.affinity_propagation_.AffinityPropagation'>, 'AgglomerativeClustering': <class 'sklearn.cluster.hierarchical.AgglomerativeClustering'>, 'DBSCAN': <class 'sklearn.cluster.dbscan_.DBSCAN'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', "import pandas as pd\nimport numpy as np\n\n# Loadin..._ML.csv')\nshoot = pd.read_csv('shootings_ML.csv')", 'cust.head()', 'shoot.head()', "cust.drop('Unnamed: 0', axis=1, inplace=True)", "shoot.drop('Unnamed: 0', axis=1, inplace=True)", '#Scale cust df\nX_col = cust.columns\n\ncust_scaled...ansform(cust), columns= X_col)\ncust_scaled.head()', '#Scale shoot df\nX_cols = shoot.columns\n\nshoot_sc...orm(shoot), columns = X_cols)\nshoot_scaled.head()', '#create training and testing groups of data\ntrai...(cust_scaled, test_size = 0.2, random_state = 11)', "#KMeans Clustering\nk_range = range(2,11)\n\nfor i ...ouette coefficient is %0.2f'% (i,silhouette_avg))", "# Affinity propagation\naffPro = AffinityPropagat...te_score(train_cust, labels, metric='euclidean'))", '# Spectral clustering\n\nfor k in range(2,11):\n   ...and silhouette coefficient is %0.2f" % (k,score))', "# Using PCA for dimensionality reduction\nP= PCA(...'str')\n\nprint(data_1.pred_cluster.value_counts())", '#Tuning Affinity Propogation\ndef grid_aff(parame...t clf.best_score_\n    print clf.cluster_centers_ ', '#Tuning Affinity Propogation\ndef grid_aff(parame...t clf.best_score_\n    print clf.cluster_centers_ ', '#Tuning Affinity Propogation\ndef grid_aff(parame...t clf.best_score_\n    print(clf.cluster_centers_)', '#Tuning Affinity Propogation\ndef grid_aff(parame...(clf.best_score_)\n    print(clf.cluster_centers_)', "param_grid = {'damping': [0.5,0.75, 1], 'max_ite...'True','False']}\ngrid_aff(param_grid, train_cust)", "import pandas as pd\nimport numpy as np\n\n# Loadin..._ML.csv')\nshoot = pd.read_csv('shootings_ML.csv')", "import pandas as pd\nimport numpy as np\n\n# Loadin..._ML.csv')\nshoot = pd.read_csv('shootings_ML.csv')", ...], 'KFold': <class 'sklearn.model_selection._split.KFold'>, 'KMeans': <class 'sklearn.cluster.k_means_.KMeans'>, 'Out': {2:    Unnamed: 0  dept  custody_type  facility  rac...11     11  
4              0  31.0  2014     12  , 3:    Unnamed: 0   age  number_officers  fatality  ...010     12  
4            13     0  2011      5  , 6:        dept  custody_type  facility      race   ...
4      -0.605742 -1.220747  0.995641  1.595334  , 7:         age  number_officers  fatality     armed...982     -1.051157 -1.900016 -1.048604 -0.400275  , 20:    Unnamed: 0  dept  custody_type  facility  rac...11     11  
4              0  31.0  2014     12  , 21:    Unnamed: 0   age  number_officers  fatality  ...010     12  
4            13     0  2011      5  , 24:        dept  custody_type  facility      race   ...
4      -0.605742 -1.220747  0.995641  1.595334  , 25:         age  number_officers  fatality     armed...982     -1.051157 -1.900016 -1.048604 -0.400275  , 34:        dept  custody_type  facility      race   ...
4      -0.605742 -1.220747  0.995641  1.595334  , 35:         age  number_officers  fatality     armed...982     -1.051157 -1.900016 -1.048604 -0.400275  , ...}, 'P': PCA(copy=True, iterated_power='auto', n_componen...None,
  svd_solver='auto', tol=0.0, whiten=False), 'PCA': <class 'sklearn.decomposition.pca.PCA'>, ...}
        self.user_ns = {'AffinityPropagation': <class 'sklearn.cluster.affinity_propagation_.AffinityPropagation'>, 'AgglomerativeClustering': <class 'sklearn.cluster.hierarchical.AgglomerativeClustering'>, 'DBSCAN': <class 'sklearn.cluster.dbscan_.DBSCAN'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', "import pandas as pd\nimport numpy as np\n\n# Loadin..._ML.csv')\nshoot = pd.read_csv('shootings_ML.csv')", 'cust.head()', 'shoot.head()', "cust.drop('Unnamed: 0', axis=1, inplace=True)", "shoot.drop('Unnamed: 0', axis=1, inplace=True)", '#Scale cust df\nX_col = cust.columns\n\ncust_scaled...ansform(cust), columns= X_col)\ncust_scaled.head()', '#Scale shoot df\nX_cols = shoot.columns\n\nshoot_sc...orm(shoot), columns = X_cols)\nshoot_scaled.head()', '#create training and testing groups of data\ntrai...(cust_scaled, test_size = 0.2, random_state = 11)', "#KMeans Clustering\nk_range = range(2,11)\n\nfor i ...ouette coefficient is %0.2f'% (i,silhouette_avg))", "# Affinity propagation\naffPro = AffinityPropagat...te_score(train_cust, labels, metric='euclidean'))", '# Spectral clustering\n\nfor k in range(2,11):\n   ...and silhouette coefficient is %0.2f" % (k,score))', "# Using PCA for dimensionality reduction\nP= PCA(...'str')\n\nprint(data_1.pred_cluster.value_counts())", '#Tuning Affinity Propogation\ndef grid_aff(parame...t clf.best_score_\n    print clf.cluster_centers_ ', '#Tuning Affinity Propogation\ndef grid_aff(parame...t clf.best_score_\n    print clf.cluster_centers_ ', '#Tuning Affinity Propogation\ndef grid_aff(parame...t clf.best_score_\n    print(clf.cluster_centers_)', '#Tuning Affinity Propogation\ndef grid_aff(parame...(clf.best_score_)\n    print(clf.cluster_centers_)', "param_grid = {'damping': [0.5,0.75, 1], 'max_ite...'True','False']}\ngrid_aff(param_grid, train_cust)", "import pandas as pd\nimport numpy as np\n\n# Loadin..._ML.csv')\nshoot = pd.read_csv('shootings_ML.csv')", "import pandas as pd\nimport numpy as np\n\n# Loadin..._ML.csv')\nshoot = pd.read_csv('shootings_ML.csv')", ...], 'KFold': <class 'sklearn.model_selection._split.KFold'>, 'KMeans': <class 'sklearn.cluster.k_means_.KMeans'>, 'Out': {2:    Unnamed: 0  dept  custody_type  facility  rac...11     11  
4              0  31.0  2014     12  , 3:    Unnamed: 0   age  number_officers  fatality  ...010     12  
4            13     0  2011      5  , 6:        dept  custody_type  facility      race   ...
4      -0.605742 -1.220747  0.995641  1.595334  , 7:         age  number_officers  fatality     armed...982     -1.051157 -1.900016 -1.048604 -0.400275  , 20:    Unnamed: 0  dept  custody_type  facility  rac...11     11  
4              0  31.0  2014     12  , 21:    Unnamed: 0   age  number_officers  fatality  ...010     12  
4            13     0  2011      5  , 24:        dept  custody_type  facility      race   ...
4      -0.605742 -1.220747  0.995641  1.595334  , 25:         age  number_officers  fatality     armed...982     -1.051157 -1.900016 -1.048604 -0.400275  , 34:        dept  custody_type  facility      race   ...
4      -0.605742 -1.220747  0.995641  1.595334  , 35:         age  number_officers  fatality     armed...982     -1.051157 -1.900016 -1.048604 -0.400275  , ...}, 'P': PCA(copy=True, iterated_power='auto', n_componen...None,
  svd_solver='auto', tol=0.0, whiten=False), 'PCA': <class 'sklearn.decomposition.pca.PCA'>, ...}
   2911             finally:
   2912                 # Reset our crash handler in place
   2913                 sys.excepthook = old_excepthook
   2914         except SystemExit as e:

...........................................................................
C:\Users\Lara\Documents\Repository\Capstone2\<ipython-input-42-6d5282d5cdf9> in <module>()
      1 param_grid = {'damping': [0.5,0.75, 1], 'max_iter': [50, 100, 200, 300, 350], 'convergence_iter': [7, 15, 23], 'verbose':['True','False']}
----> 2 grid_aff(param_grid, train_cust)

...........................................................................
C:\Users\Lara\Documents\Repository\Capstone2\<ipython-input-38-66de2106d230> in grid_aff(parameters={'convergence_iter': [7, 15, 23], 'damping': [0.5, 0.75, 1], 'max_iter': [50, 100, 200, 300, 350], 'verbose': ['True', 'False']}, x=          dept  custody_type  facility      race...9  1.287418  1.595334  

[6183 rows x 10 columns])
      1 #Tuning Affinity Propogation
      2 def grid_aff(parameters, x):
      3     kfold = model_selection.KFold(n_splits=10, shuffle = True, random_state=11)
      4     aff = AffinityPropagation()
      5     clf = GridSearchCV(aff, parameters, n_jobs =-1, cv = kfold, scoring='adjusted_mutual_info_score')
----> 6     clf.fit(x)
      7     
      8     print(clf.best_estimator_)
      9     print(clf.best_score_)
     10     print(clf.cluster_centers_)

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self=GridSearchCV(cv=KFold(n_splits=10, random_state=... scoring='adjusted_mutual_info_score', verbose=0), X=          dept  custody_type  facility      race...9  1.287418  1.595334  

[6183 rows x 10 columns], y=None, groups=None, **fit_params={})
    634                                   return_train_score=self.return_train_score,
    635                                   return_n_test_samples=True,
    636                                   return_times=True, return_parameters=False,
    637                                   error_score=self.error_score)
    638           for parameters, (train, test) in product(candidate_params,
--> 639                                                    cv.split(X, y, groups)))
        cv.split = <bound method _BaseKFold.split of KFold(n_splits=10, random_state=11, shuffle=True)>
        X =           dept  custody_type  facility      race...9  1.287418  1.595334  

[6183 rows x 10 columns]
        y = None
        groups = None
    640 
    641         # if one choose to see train score, "out" will contain train score info
    642         if self.return_train_score:
    643             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
TypeError                                          Mon Jan 29 16:45:44 2018
PID: 7568                 Python 3.6.3: C:\Users\Lara\Miniconda3\python.exe
...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (AffinityPropagation(affinity='euclidean', conver....5, max_iter=50, preference=None, verbose='True'),           dept  custody_type  facility      race...9  1.287418  1.595334  

[6183 rows x 10 columns], None, {'score': make_scorer(adjusted_mutual_info_score)}, array([   0,    2,    3, ..., 6180, 6181, 6182]), array([   1,    5,   15,   32,   36,   42,   47,...6135, 6139, 6149, 6152,
       6158, 6161, 6162]), 0, {'convergence_iter': 7, 'damping': 0.5, 'max_iter': 50, 'verbose': 'True'}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (AffinityPropagation(affinity='euclidean', conver....5, max_iter=50, preference=None, verbose='True'),           dept  custody_type  facility      race...9  1.287418  1.595334  

[6183 rows x 10 columns], None, {'score': make_scorer(adjusted_mutual_info_score)}, array([   0,    2,    3, ..., 6180, 6181, 6182]), array([   1,    5,   15,   32,   36,   42,   47,...6135, 6139, 6149, 6152,
       6158, 6161, 6162]), 0, {'convergence_iter': 7, 'damping': 0.5, 'max_iter': 50, 'verbose': 'True'})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=AffinityPropagation(affinity='euclidean', conver....5, max_iter=50, preference=None, verbose='True'), X=          dept  custody_type  facility      race...9  1.287418  1.595334  

[6183 rows x 10 columns], y=None, scorer={'score': make_scorer(adjusted_mutual_info_score)}, train=array([   0,    2,    3, ..., 6180, 6181, 6182]), test=array([   1,    5,   15,   32,   36,   42,   47,...6135, 6139, 6149, 6152,
       6158, 6161, 6162]), verbose=0, parameters={'convergence_iter': 7, 'damping': 0.5, 'max_iter': 50, 'verbose': 'True'}, fit_params={}, return_train_score='warn', return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    483                              " make sure that it has been spelled correctly.)")
    484 
    485     else:
    486         fit_time = time.time() - start_time
    487         # _score will return dict if is_multimetric is True
--> 488         test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
        test_scores = {}
        estimator = AffinityPropagation(affinity='euclidean', conver....5, max_iter=50, preference=None, verbose='True')
        X_test =           dept  custody_type  facility      race...14  0.995641  0.142621  

[619 rows x 10 columns]
        y_test = None
        scorer = {'score': make_scorer(adjusted_mutual_info_score)}
        is_multimetric = True
    489         score_time = time.time() - start_time - fit_time
    490         if return_train_score:
    491             train_scores = _score(estimator, X_train, y_train, scorer,
    492                                   is_multimetric)

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\sklearn\model_selection\_validation.py in _score(estimator=AffinityPropagation(affinity='euclidean', conver....5, max_iter=50, preference=None, verbose='True'), X_test=          dept  custody_type  facility      race...14  0.995641  0.142621  

[619 rows x 10 columns], y_test=None, scorer={'score': make_scorer(adjusted_mutual_info_score)}, is_multimetric=True)
    518 
    519     Will return a single float if is_multimetric is False and a dict of floats,
    520     if is_multimetric is True
    521     """
    522     if is_multimetric:
--> 523         return _multimetric_score(estimator, X_test, y_test, scorer)
        estimator = AffinityPropagation(affinity='euclidean', conver....5, max_iter=50, preference=None, verbose='True')
        X_test =           dept  custody_type  facility      race...14  0.995641  0.142621  

[619 rows x 10 columns]
        y_test = None
        scorer = {'score': make_scorer(adjusted_mutual_info_score)}
    524     else:
    525         if y_test is None:
    526             score = scorer(estimator, X_test)
    527         else:

...........................................................................
C:\Users\Lara\Miniconda3\lib\site-packages\sklearn\model_selection\_validation.py in _multimetric_score(estimator=AffinityPropagation(affinity='euclidean', conver....5, max_iter=50, preference=None, verbose='True'), X_test=          dept  custody_type  facility      race...14  0.995641  0.142621  

[619 rows x 10 columns], y_test=None, scorers={'score': make_scorer(adjusted_mutual_info_score)})
    546     """Return a dict of score for multimetric scoring"""
    547     scores = {}
    548 
    549     for name, scorer in scorers.items():
    550         if y_test is None:
--> 551             score = scorer(estimator, X_test)
        score = undefined
        scorer = make_scorer(adjusted_mutual_info_score)
        estimator = AffinityPropagation(affinity='euclidean', conver....5, max_iter=50, preference=None, verbose='True')
        X_test =           dept  custody_type  facility      race...14  0.995641  0.142621  

[619 rows x 10 columns]
    552         else:
    553             score = scorer(estimator, X_test, y_test)
    554 
    555         if hasattr(score, 'item'):

TypeError: __call__() missing 1 required positional argument: 'y_true'
___________________________________________________________________________

In [None]:
# Spectral clustering

for k in range(2,11):
    spect = SpectralClustering(n_clusters=k,random_state=11)
    spect.fit_predict(train_cust)
    spectlabel = spect.labels_
    score=silhouette_score(train_cust, spectlabel, metric='euclidean')
    print("The number of clusters, %d, and silhouette coefficient is %0.2f" % (k,score))