# Safety Recommenders


Link to the Adress dataset: 
http://opendata.dc.gov/datasets/address-points


Importing modules:

In [1]:

%matplotlib notebook
import IPython
from IPython.display import display
from sqlalchemy import create_engine
import psycopg2
import psycopg2.extras
import pandas as pd
import csv
from numpy import nan as NA
from datetime import datetime
import re
import sys
import numpy as np
import matplotlib.pyplot as plt 
import scipy as sp
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer
from sklearn import cross_validation
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report



   # Data ingestion:


In [2]:


class Ingestion(object):
    """This is the ingestion class to deal with csv directly from the same directory where the module is"""

    def __init__(self, file, sep = ",", header =0):
        self.file = file
        self.delimiter = sep
        self.df = pd.read_csv(file, sep= sep, header=header, engine='python')

    def file_csv(self):
        return self.df

class IngestionDatabase(object):

    """ This is the ingestion class to deal with postgress database """
    def __init__(self, database, query):
        self.engine = create_engine(database)
        self.table_names = self.engine.table_names()
        self.con = self.engine.connect()
        self.rs = self.con.execute(query)
        self.df = pd.DataFrame(self.rs.fetchmany(size=15))

    def cols(self):
        self.df.columns = self.rs.keys()
        return self.df


Creating the inglestion instances:

In [3]:
ingest = Ingestion('DC_Crime_Official.csv')
data = ingest.file_csv()

Exploring the raw dataset:

In [4]:
data.head(2)

Unnamed: 0,neighborhood_cluster,census_tract,offense_group,longitude,end_date,offense_text,shift,yblock,district,ward,...,xblock,block,start_date,cnn,offense,anc,report_date,method,location,latitude
0,cluster 32,7708.0,property,-76.955521,2016-10-04T16:37:29.000,motor vehicle theft,evening,135396.0,6.0,7,...,403859.0,3500 - 3526 block of minnesota avenue se,2016-10-04T16:09:08.000,16168699,motor vehicle theft,7B,2016-10-05T03:00:47.000Z,others,"38.886402972929169,-76.955523453842957",38.886395
1,cluster 4,100.0,property,-77.059614,2016-10-04T17:44:13.000,theft/other,evening,137194.0,2.0,2,...,394829.0,3000 - 3029 block of k street nw,2016-10-04T17:00:48.000,16168738,theft/other,2E,2016-10-05T01:36:11.000Z,others,"38.902593275715809,-77.059616752356732",38.902585


# Initial Data Wrangling


In [5]:
class Wrangling(object):

    def __init__(self, data = data):
        self.df = data
# drop empty rows
    def dropNA(self):
        self.df = self.df.dropna(how='all') # this only drop rows with 100% NA
        return self.df

    def __offense_column(self, text1 ='theft/other', text2 ='theft f/auto', text3 = 'assault w/dangerous weapon',
                       repl1 = 'theft', repl2 = 'auto theft', repl3 = 'assault with weapon' ):

        """There are 9 categories of offenses here:
        This function will transform the caterogies into more readable text
        for example : assault w/dangerous weapon = assault with dangerous weapon"""

        self.df['offense_text'] = self.df['offense_text'].replace([text1, text2, # add the column name to the arguments.
        text3], [repl1, repl2, repl3])
        return self.df

    def __date_time_parser(self, time = 'start_date'):
        ''' transform into datetime 64 object'''
        self.df[time] = pd.to_datetime(self.df['start_date'])
        return self.df

    def __latlong_cutter(self):
        """ Reduce the presition of the lat long data by cutting them."""
        self.newlat = []
        self.newlon = []
        for item in self.df['latitude']:
            item = str(item)
            item = float(item[0:6])
            self.newlat.append(item)

        self.df['latitude'] = self.newlat

        for item in self.df['longitude']:
            item = str(item)
            item = float(item[0:7])
            self.newlon.append(item)
        self.df['longitude'] = self.newlon

        return self.df

    def lat_long_rounder(self, decimals = 3):
        """ Reduce the presition of the lat long data by rounging decimals"""
        self.df['latitude'] = self.df['latitude'].round(decimals = decimals)
        self.df['longitude'] = self.df['longitude'].round(decimals = decimals)
        return self.df

    def adress_format_modifier(self):
        """This columns replace some of the content from the block columns to it is easy to parse it"""

        self.splitted = []

        # creating the splited column
        # this is working. it cannot be transformed into pandas' .replace because it is using the split method
        # Note that the built in .replace it does not work properly with integers and neither with large amounts of
        # things to change.. This works but it is not very wise to use.
        for row in self.df['block']:
            row = row.replace("block of ", "")
            row = row.replace("street", "St")
            row = row.replace("-", "")
            row = row.split(' ', 1)
            self.splitted.append(row)
        self.df['splitted'] = self.splitted
        return self.df

    def block_parser(self):
        """ This is the block parser that separate block in start and en blocks"""

        self.startblock = []
        self.endblock_1 = []
        self.endblock = []
        #  create column 'startblock'
        for row in self.df['splitted']:
            row = row[0]
            self.startblock.append(row)
        self.df['startblock'] = self.startblock
        # create column  'endblock_1'
        for row in self.df['splitted']:
            row = row[-1].lstrip() # enblock_1
            row = row.split(' ',1)
            self.endblock_1.append(row)
        self.df['endblock_1'] = self.endblock_1
        # create column  'endblock'
        for row in self.df['endblock_1']:
            row = row[0]
            self.endblock.append(row)
        self.df['endblock'] = self.endblock
        return self.df

    def street_parser(self):
        self.street = []
        #creating column 'street'
        for row in self.df['endblock_1']:
            row = row[1]
            self.street.append(row)
        self.df['street'] = self.street
        return self.df

Creating the wrangling instances:



In [6]:
Wrangled = Wrangling()
Wrangled.dropNA()
Wrangled.lat_long_rounder()
Wrangled.adress_format_modifier()
Wrangled.block_parser()
df = Wrangled.street_parser()



In [7]:
df

Unnamed: 0,neighborhood_cluster,census_tract,offense_group,longitude,end_date,offense_text,shift,yblock,district,ward,...,anc,report_date,method,location,latitude,splitted,startblock,endblock_1,endblock,street
0,cluster 32,7708.0,property,-76.956,2016-10-04T16:37:29.000,motor vehicle theft,evening,135396.0,6.0,7,...,7B,2016-10-05T03:00:47.000Z,others,"38.886402972929169,-76.955523453842957",38.886,"[3500, 3526 minnesota avenue se]",3500,"[3526, minnesota avenue se]",3526,minnesota avenue se
1,cluster 4,100.0,property,-77.060,2016-10-04T17:44:13.000,theft/other,evening,137194.0,2.0,2,...,2E,2016-10-05T01:36:11.000Z,others,"38.902593275715809,-77.059616752356732",38.903,"[3000, 3029 k St nw]",3000,"[3029, k St nw]",3029,k St nw
2,cluster 23,8904.0,property,-76.982,2016-10-04T20:10:43.000,theft/other,evening,136987.0,5.0,5,...,5D,2016-10-05T02:21:19.000Z,others,"38.900742359981244,-76.981925885322667",38.901,"[1500, 1599 maryland avenue ne]",1500,"[1599, maryland avenue ne]",1599,maryland avenue ne
3,cluster 29,9601.0,violent,-76.938,2016-10-14T00:12:56.000,robbery,midnight,138092.0,6.0,7,...,7D,2016-10-14T05:31:03.000Z,gun,"38.910681288493713,-76.937533164068512",38.911,"[1500, 1535 45th St ne]",1500,"[1535, 45th St ne]",1535,45th St ne
4,cluster 25,10600.0,violent,-76.996,2016-10-22T16:40:05.000,robbery,evening,137463.0,5.0,6,...,6C,2016-10-22T22:55:30.000Z,gun,"38.90503165003841,-76.995540557600265",38.905,"[700, 799 florida avenue ne]",700,"[799, florida avenue ne]",799,florida avenue ne
5,cluster 2,3200.0,property,-77.021,2016-10-22T22:56:31.000,theft f/auto,midnight,140137.0,3.0,1,...,1A,2016-10-24T04:29:28.000Z,others,"38.929118054612083,-77.021407353091945",38.929,"[500, 699 irving St nw]",500,"[699, irving St nw]",699,irving St nw
6,cluster 31,7807.0,property,-76.923,,theft/other,midnight,137294.0,6.0,7,...,7C,2016-10-23T04:11:28.000Z,others,"38.903483920886075,-76.922955583391357",38.903,"[934, 1099 eastern avenue ne]",934,"[1099, eastern avenue ne]",1099,eastern avenue ne
7,cluster 6,10700.0,property,-77.040,2016-10-23T04:28:53.000,theft f/auto,midnight,137847.0,2.0,2,...,2B,2016-10-23T09:24:07.000Z,others,"38.908484062358717,-77.040090587782586",38.908,"[1700, 1799 massachusetts avenue nw]",1700,"[1799, massachusetts avenue nw]",1799,massachusetts avenue nw
8,cluster 17,1804.0,property,-77.032,2016-10-23T06:00:06.000,theft f/auto,day,144484.0,4.0,4,...,4A,2016-10-23T11:46:17.000Z,others,"38.968274741325793,-77.031562108374985",38.968,"[1300, 1399 tewkesbury place nw]",1300,"[1399, tewkesbury place nw]",1399,tewkesbury place nw
9,cluster 26,7000.0,property,-76.996,2016-11-01T10:00:39.000,theft f/auto,day,134740.0,1.0,6,...,6B,2016-11-01T15:26:46.000Z,others,"38.880501876434913,-76.996187492452577",38.880,"[700, 753 7th St se]",700,"[753, 7th St se]",753,7th St se


Dropping repeated columns

In [8]:
df = df.drop(columns = ['location', 'endblock_1', 'splitted'])
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 69204 entries, 0 to 69203
Data columns (total 30 columns):
neighborhood_cluster    68431 non-null object
census_tract            69051 non-null float64
offense_group           69204 non-null object
longitude               69204 non-null float64
end_date                65716 non-null object
offense_text            69204 non-null object
shift                   69204 non-null object
yblock                  69204 non-null float64
district                69181 non-null float64
ward                    69204 non-null int64
year                    69204 non-null int64
offense_key             69204 non-null object
bid                     12302 non-null object
sector                  69175 non-null object
psa                     69175 non-null float64
ucrrank                 69204 non-null int64
block_group             69051 non-null object
voting_precinct         69204 non-null object
xblock                  69204 non-null float64
block         

# Machine Learning Section



Preparing X and y sets:

Eliminating Nans:

In [9]:
df = df.dropna()

In [10]:

X = df.drop(columns = ['ucrrank'])

y = df['ucrrank']



Encoding the Categorical variables

In [11]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder() # it only support one dimentional columns.. look stack overflow

for colname,col in X.iteritems():
    if col is not float:
        X[colname] = LabelEncoder().fit_transform(col)
    


Creating the Training and test set:


In [12]:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = 0.2, random_state = 0)

# Visual Exploration

In [None]:
# pd.plotting.scatter_matrix(X_train, c = y_train, figsize = (30, 30), marker ='o', hist_kwds = {'bins': 20},
#                            s = 60, alpha = 0.7)

In [15]:
plt.boxplot(X_train, manage_xticks = False)
plt.yscale("symlog")
plt.xlabel("Features")
plt.ylabel("Target Variable")
plt.show()

<IPython.core.display.Javascript object>

Scaling the features:


In [13]:
scaler = StandardScaler()
#scaler =  MinMaxScaler()
#scaler = Normalizer()
X_train = scaler.fit(X_train).transform(X_train)
X_test = scaler.fit(X_test).transform(X_test)

In [14]:
plt.boxplot(X_train, manage_xticks = False)
plt.yscale("symlog")
plt.xlabel("Features")
plt.ylabel("Target Variable")
plt.show()

<IPython.core.display.Javascript object>

# Performing simple Knn:


In [16]:
knn = KNeighborsClassifier(n_neighbors = 10, metric = 'manhattan', weights = 'uniform', algorithm = 'auto')
knn.fit(X_train, y_train)
predicted_knn = knn.predict(X_test)
print("Predictions: {}".format(predicted_knn))

Predictions: [6 5 7 ... 6 6 3]


Cross Validation

In [17]:
scores = cross_val_score(knn, X = X_train, y = y_train)
print ("Cross Validation Scores: {}".format(scores))

Cross Validation Scores: [0.92502466 0.92338047 0.9295821 ]


Score Reporting

In [18]:
report = classification_report(y_test, predicted_knn)
print (report)

             precision    recall  f1-score   support

          1       0.00      0.00      0.00         1
          2       0.50      0.06      0.11        16
          3       1.00      0.97      0.99        70
          4       0.85      0.96      0.90        93
          5       0.97      0.85      0.91        75
          6       0.95      0.99      0.97      1406
          7       0.94      0.89      0.91       541
          8       0.84      0.58      0.69        79

avg / total       0.94      0.94      0.94      2281



  'precision', 'predicted', average, warn_for)


# merging two datasets 

In [19]:
df1 = df[['latitude', 'longitude', 'ucrrank']] # selecting the relevant labels from the wrangled crime dataset (df).
df1.ucrrank.describe()

count    11402.000000
mean         6.096474
std          0.981268
min          1.000000
25%          6.000000
50%          6.000000
75%          7.000000
max          8.000000
Name: ucrrank, dtype: float64

In [20]:

# ingesting new dataset
ingest2 = Ingestion('Address_Points.csv')
data2 = ingest2.file_csv()

In [21]:

df2 = data2[['LATITUDE', 'LONGITUDE']] # selecting the relevant columns. 

df2.columns = df2.columns.str.lower() # transforming labels to lowercase


Befor merging. In the Adress_point dataset, we need a way to eliminate all the rows from the locations that match the crime dataset.

In [33]:

#matching_crime_values =  

lat = []
long = []

for colname, col, colname2, col2 in zip(df1.iteritems(), df2.iteritems()):
    if colname['latitude'] and colname['longitude'] =! colname2['latitude'] and colname2['longitude']:
        lat.



        
        

TypeError: string indices must be integers

In [23]:
df2['ucrrank'] = 10 # creating new safety ranking 10 ---> 0 risk 
df2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,latitude,longitude,ucrrank
0,38.893995,-76.959322,10
1,38.894594,-76.959040,10
2,38.890435,-76.959859,10
3,38.894384,-76.956349,10
4,38.894297,-76.956807,10
5,38.894365,-76.954544,10
6,38.894339,-76.955069,10
7,38.894952,-76.956565,10
8,38.893995,-76.958873,10
9,38.890784,-76.959292,10


In [69]:
Wrangled2 = Wrangling(df2)
# Wrangled.dropNA()
df2 = Wrangled2.lat_long_rounder()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Mergin the two dataframes:

In [70]:
frames = [df1, df2]
df_merged = pd.concat(frames)

In [84]:
X = df_merged.drop(columns = ['ucrrank'])

y = df_merged['ucrrank']

In [85]:
# no need to encode the labels in this case... no categorical variables:


Creating the Training and test set:


In [86]:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = 0.2, random_state = 0)

Scaling the features:

In [74]:
scaler = StandardScaler()
#scaler =  MinMaxScaler()
#scaler = Normalizer()
X_train = scaler.fit(X_train).transform(X_train)
X_test = scaler.fit(X_test).transform(X_test)

# Performing simple Knn on merged dataset

In [75]:
knn = KNeighborsClassifier(n_neighbors = 10, metric = 'manhattan', weights = 'uniform', algorithm = 'auto')
knn.fit(X_train, y_train)
predicted_knn = knn.predict(X_test)
print("Predictions: {}".format(predicted_knn))

Predictions: [10 10 10 ... 10  7 10]


array([10, 10, 10, ..., 10,  7, 10], dtype=int64)

In [64]:
scores = cross_val_score(knn, X = X_train, y = y_train)
print ("Cross Validation Scores: {}".format(scores))

Cross Validation Scores: [0.75414183 0.75394694 0.75525554]


In [65]:
report = classification_report(y_test, predicted_knn)
print (report)

             precision    recall  f1-score   support

          1       0.00      0.00      0.00        50
          2       0.10      0.02      0.03       131
          3       0.14      0.07      0.10       751
          4       0.10      0.03      0.05       954
          5       0.10      0.03      0.05       692
          6       0.53      0.55      0.54      5729
          7       0.38      0.29      0.33      4421
          8       0.16      0.02      0.04      1008
         10       0.82      0.92      0.87     29541

avg / total       0.68      0.73      0.70     43277



  'precision', 'predicted', average, warn_for)


This models calculates the most likely event to happen in a particular location. from 1 to 10 1 is the most serious crime and 10 is no crime. The presition of the latitude and longitude was reduced to a block. 

# ANN test.

In [123]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.utils import to_categorical
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from keras.utils import np_utils

Encoding target Variable:
    

In [89]:
y.describe

<bound method NDFrame.describe of 0          8
1          6
2          6
3          4
4          4
5          7
6          6
7          7
8          7
9          7
10         6
11         7
12         6
13         6
14         7
15         5
16         6
17         7
18         7
19         6
20         7
21         7
22         3
23         6
24         4
25         6
26         7
27         7
28         7
29         6
          ..
147151    10
147152    10
147153    10
147154    10
147155    10
147156    10
147157    10
147158    10
147159    10
147160    10
147161    10
147162    10
147163    10
147164    10
147165    10
147166    10
147167    10
147168    10
147169    10
147170    10
147171    10
147172    10
147173    10
147174    10
147175    10
147176    10
147177    10
147178    10
147179    10
147180    10
Name: ucrrank, Length: 216385, dtype: int64>

In [115]:

onehotencoder = OneHotEncoder()
y1 = y.reshape(-1, 1)
y1 = onehotencoder.fit_transform(y1).toarray()
y1 = pd.DataFrame(y1)
y1.columns = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
y1

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,1,2,3,4,5,6,7,8,9,10
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [101]:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y1, test_size = 0.2, random_state = 0)

In [106]:
model = Sequential()
model.add(Dense(units=2, input_dim=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=2))
model.add(Activation('relu'))
model.add(Dense(units=10))
model.add(Activation('softmax'))

# 
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.fit(X_train, y_train, batch_size = 30, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x14ed0b792e8>

In [121]:
y_predict = model.predict(X_test)
y_test = y_test.astype(float)
y_predict = y_predict
#y_predict = y_predict > 0.1
#y_predict = pd.DataFrame(y_predict)
y_predict

array([[1.1833323e-03, 2.8913079e-03, 1.8129852e-02, ..., 2.3527231e-02,
        3.9218830e-05, 6.8080950e-01],
       [1.1833323e-03, 2.8913079e-03, 1.8129852e-02, ..., 2.3527231e-02,
        3.9218830e-05, 6.8080950e-01],
       [1.1833323e-03, 2.8913079e-03, 1.8129852e-02, ..., 2.3527231e-02,
        3.9218830e-05, 6.8080950e-01],
       ...,
       [1.1833323e-03, 2.8913079e-03, 1.8129852e-02, ..., 2.3527231e-02,
        3.9218830e-05, 6.8080950e-01],
       [1.1833323e-03, 2.8913079e-03, 1.8129852e-02, ..., 2.3527231e-02,
        3.9218830e-05, 6.8080950e-01],
       [1.1833323e-03, 2.8913079e-03, 1.8129852e-02, ..., 2.3527231e-02,
        3.9218827e-05, 6.8080950e-01]], dtype=float32)

In [122]:
from sklearn.metrics import confusion_matrix
from numpy import argmax
from sklearn.metrics import accuracy_score

cf = confusion_matrix(y_test.values.argmax(axis=1), y_predict.argmax(axis=1))
cf

array([[    0,     0,     0,     0,     0,     0,     0,     0,    50],
       [    0,     0,     0,     0,     0,     0,     0,     0,   131],
       [    0,     0,     0,     0,     0,     0,     0,     0,   751],
       [    0,     0,     0,     0,     0,     0,     0,     0,   954],
       [    0,     0,     0,     0,     0,     0,     0,     0,   692],
       [    0,     0,     0,     0,     0,     0,     0,     0,  5729],
       [    0,     0,     0,     0,     0,     0,     0,     0,  4421],
       [    0,     0,     0,     0,     0,     0,     0,     0,  1008],
       [    0,     0,     0,     0,     0,     0,     0,     0, 29541]],
      dtype=int64)