In [1]:
import pandas as pd

In [2]:
london_headers =["soa_code", "borough", "major_category", "minor_category", "value", "year", "month"]
baltimore_headers = ["CrimeDate", "CrimeTime", "CrimeCode", "Location", "Description", "Inside/Outside", "Weapon", "Post", "District", "Neighborhood", "Longitude", "Latitude", "Location 1", "Premise" , "vri_name1", "Total Incidents"] 
la_headers = ["DR Number","Date Reported","Date Occurred","Time Occurred","Area ID","Area Name","Reporting District","Crime Code","Crime Code Description","MO Codes","Victim Age","Victim Sex","Victim Descent","Premise Code","Premise Description","Weapon Used Code","Weapon Description","Status Code","Status Description","Crime Code 1","Crime Code 2","Crime Code 3","Crime Code 4","Address","Cross Street","Location" ]
global_headers = ['id', 'type', 'timestamp', 'city', 'district', 'weapon']

In [3]:
# https://github.com/luozhouyang/python-string-similarity
from strsimpy.jaro_winkler import JaroWinkler
jaro_winkler = JaroWinkler()

def getJaroWinkler(s0, s1):
    return jaro_winkler.similarity(s0, s1)

In [4]:
from strsimpy.jaccard import Jaccard
jaccard = Jaccard(2)

def getJaccard(s0, s1):
    return 1-jaccard.distance(s0, s1)

In [5]:
def calcMatrix(a0, a1, f):
    return [[f(e0,e1) for e1 in a1] for e0 in a0]
            
#def getTable(a0, a1, f):
#    m = calcMatrix(a0, a1, f)
#    return pd.DataFrame(data=m, columns=a1, index=a0)

import seaborn as sns

def getTable(a0, a1, f, title):
    m = calcMatrix(a0, a1, f)
    df = pd.DataFrame(data=m, columns=a1, index=a0)
    #df = df.style.set_table_attributes("style='display:inline'").set_caption(title)
    cm = sns.light_palette("green", as_cmap=True)
    df = df.clip(0, 1)
    df = df.style.background_gradient(cmap=cm, high=1, low=0)
    return df

In [6]:
headers = [la_headers, london_headers, baltimore_headers, global_headers]
functions = [getJaccard, getJaroWinkler]

tables_jaccard = []
tables_jaro_winkler = []

f = getJaccard
tables_jaccard.append(getTable(global_headers, london_headers, f, "global - london"))
tables_jaccard.append(getTable(global_headers, la_headers, f, "global - la"))
tables_jaccard.append(getTable(global_headers, baltimore_headers, f, "global - baltimore"))

f = getJaroWinkler
tables_jaro_winkler.append(getTable(global_headers, london_headers, f, "global - london"))
tables_jaro_winkler.append(getTable(global_headers, la_headers, f, "global - la"))
tables_jaro_winkler.append(getTable(global_headers, baltimore_headers, f, "global - baltimore"))


In [7]:
pd.DataFrame(data=headers, index=['la', 'london', 'baltimore', 'global'])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
la,DR Number,Date Reported,Date Occurred,Time Occurred,Area ID,Area Name,Reporting District,Crime Code,Crime Code Description,MO Codes,...,Weapon Description,Status Code,Status Description,Crime Code 1,Crime Code 2,Crime Code 3,Crime Code 4,Address,Cross Street,Location
london,soa_code,borough,major_category,minor_category,value,year,month,,,,...,,,,,,,,,,
baltimore,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,Post,District,Neighborhood,...,,,,,,,,,,
global,id,type,timestamp,city,district,weapon,,,,,...,,,,,,,,,,


# Jaccard

## global - london

In [8]:
tables_jaccard[0]

Unnamed: 0,soa_code,borough,major_category,minor_category,value,year,month
id,0,0,0,0,0,0.0,0.0
type,0,0,0,0,0,0.0,0.0
timestamp,0,0,0,0,0,0.0,0.0
city,0,0,0,0,0,0.0,0.0
district,0,0,0,0,0,0.0,0.0
weapon,0,0,0,0,0,0.142857,0.125


## global - la

In [9]:
tables_jaccard[1]

Unnamed: 0,DR Number,Date Reported,Date Occurred,Time Occurred,Area ID,Area Name,Reporting District,Crime Code,Crime Code Description,MO Codes,Victim Age,Victim Sex,Victim Descent,Premise Code,Premise Description,Weapon Used Code,Weapon Description,Status Code,Status Description,Crime Code 1,Crime Code 2,Crime Code 3,Crime Code 4,Address,Cross Street,Location
id,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
type,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
timestamp,0,0.0,0,0.111111,0.0,0.142857,0.0869565,0.133333,0.173913,0.0714286,0.133333,0.133333,0.166667,0.0,0.0833333,0.0,0.0909091,0.0588235,0.136364,0.125,0.125,0.125,0.125,0.0769231,0.0,0.0714286
city,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
district,0,0.0,0,0.0,0.0,0.0,0.333333,0.0666667,0.04,0.0,0.142857,0.142857,0.111111,0.0588235,0.0869565,0.0,0.0454545,0.0,0.0434783,0.0625,0.0625,0.0625,0.0625,0.0,0.0588235,0.0
weapon,0,0.0666667,0,0.0,0.1,0.0833333,0.047619,0.0,0.0434783,0.0,0.0,0.0,0.0,0.0,0.0454545,0.25,0.235294,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0909091


## global - baltimore

In [10]:
tables_jaccard[2]

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,Post,District,Neighborhood,Longitude,Latitude,Location 1,Premise,vri_name1,Total Incidents
id,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0714286
type,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
timestamp,0.142857,0.166667,0.142857,0.0714286,0.125,0.0,0.0,0.1,0.0714286,0,0.0,0.0714286,0.0625,0.0,0.142857,0.047619
city,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.1,0.111111,0.0,0.0,0.0,0.0625
district,0.0714286,0.0833333,0.0714286,0.0,0.0625,0.0,0.0,0.111111,0.75,0,0.0,0.0,0.0,0.0833333,0.0714286,0.0
weapon,0.0,0.0,0.0,0.0909091,0.0714286,0.0,0.666667,0.0,0.0,0,0.0833333,0.0,0.0769231,0.0,0.0,0.0


# Jaro Winkler

## global - london

In [11]:
tables_jaro_winkler[0]

Unnamed: 0,soa_code,borough,major_category,minor_category,value,year,month
id,0.0,0.0,0.0,0.52381,0.0,0.0,0.0
type,0.0,0.0,0.440476,0.440476,0.483333,0.5,0.0
timestamp,0.0,0.0,0.493386,0.504233,0.437037,0.453704,0.374074
city,0.0,0.0,0.547619,0.543651,0.0,0.0,0.483333
district,0.5,0.422619,0.418651,0.511905,0.0,0.458333,0.441667
weapon,0.361111,0.436508,0.492063,0.460317,0.455556,0.611111,0.0


## global - la

In [12]:
tables_jaro_winkler[1]

Unnamed: 0,DR Number,Date Reported,Date Occurred,Time Occurred,Area ID,Area Name,Reporting District,Crime Code,Crime Code Description,MO Codes,Victim Age,Victim Sex,Victim Descent,Premise Code,Premise Description,Weapon Used Code,Weapon Description,Status Code,Status Description,Crime Code 1,Crime Code 2,Crime Code 3,Crime Code 4,Address,Cross Street,Location
id,0.0,0.0,0.0,0.525641,0.0,0.0,0.518519,0.533333,0.69697,0.0,0.533333,0.533333,0.52381,0.527778,0.517544,0.0,0.0,0.0,0.0,0.527778,0.527778,0.527778,0.527778,0.547619,0.0,0.0
type,0.0,0.549145,0.551282,0.442308,0.464286,0.453704,0.527778,0.45,0.431818,0.458333,0.45,0.45,0.547619,0.444444,0.434211,0.375,0.37037,0.44697,0.537037,0.444444,0.444444,0.444444,0.444444,0.464286,0.0,0.0
timestamp,0.407407,0.51339,0.410256,0.521368,0.502646,0.555556,0.5,0.544444,0.594276,0.490741,0.433333,0.433333,0.570899,0.509259,0.539571,0.481481,0.472222,0.51936,0.555556,0.527778,0.527778,0.527778,0.527778,0.502646,0.462963,0.324074
city,0.0,0.442308,0.442308,0.442308,0.0,0.0,0.37037,0.45,0.431818,0.0,0.572222,0.572222,0.543651,0.444444,0.434211,0.0,0.0,0.44697,0.435185,0.444444,0.444444,0.444444,0.444444,0.0,0.444444,0.583333
district,0.0,0.535256,0.424145,0.424145,0.422619,0.412037,0.567593,0.316667,0.477273,0.0,0.55,0.55,0.594048,0.430556,0.577485,0.395833,0.567593,0.438131,0.583333,0.305556,0.305556,0.305556,0.305556,0.490079,0.527778,0.5
weapon,0.0,0.574786,0.32906,0.41453,0.539683,0.518519,0.62963,0.511111,0.474747,0.430556,0.0,0.0,0.0,0.5,0.406433,0.715278,0.703704,0.505051,0.314815,0.5,0.5,0.5,0.5,0.0,0.416667,0.513889


## global - baltimore

In [13]:
tables_jaro_winkler[2]

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,Post,District,Neighborhood,Longitude,Latitude,Location 1,Premise,vri_name1,Total Incidents
id,0.537037,0.537037,0.537037,0.0,0.0,0.714286,0.0,0.0,0.541667,0.527778,0.0,0.541667,0.0,0.0,0.537037,0.0
type,0.453704,0.453704,0.453704,0.0,0.393939,0.440476,0.444444,0.0,0.458333,0.444444,0.0,0.458333,0.45,0.464286,0.0,0.438889
timestamp,0.637037,0.62963,0.555556,0.324074,0.5367,0.493386,0.425926,0.407407,0.564815,0.296296,0.481481,0.569444,0.433333,0.588624,0.555556,0.451852
city,0.453704,0.453704,0.453704,0.583333,0.560606,0.440476,0.0,0.5,0.583333,0.444444,0.574074,0.416667,0.572222,0.0,0.453704,0.438889
district,0.458333,0.458333,0.324074,0.5,0.598485,0.594048,0.0,0.583333,0.916667,0.472222,0.490741,0.472222,0.447222,0.490079,0.324074,0.505556
weapon,0.425926,0.425926,0.518519,0.513889,0.505051,0.325397,0.888889,0.0,0.0,0.5,0.518519,0.430556,0.488889,0.436508,0.351852,0.455556
