In [1]:
import pandas as pd

In [2]:
london_headers =["soa_code", "borough", "major_category", "minor_category", "value", "year", "month"]
baltimore_headers = ["CrimeDate", "CrimeTime", "CrimeCode", "Location", "Description", "Inside/Outside", "Weapon", "Post", "District", "Neighborhood", "Longitude", "Latitude", "Location 1", "Premise" , "vri_name1", "Total Incidents"] 
la_headers = ["DR Number","Date Reported","Date Occurred","Time Occurred","Area ID","Area Name","Reporting District","Crime Code","Crime Code Description","MO Codes","Victim Age","Victim Sex","Victim Descent","Premise Code","Premise Description","Weapon Used Code","Weapon Description","Status Code","Status Description","Crime Code 1","Crime Code 2","Crime Code 3","Crime Code 4","Address","Cross Street","Location" ]
global_headers = ['id', 'type', 'timestamp', 'city', 'district', 'weapon']

In [3]:
# https://github.com/luozhouyang/python-string-similarity
from strsimpy.jaro_winkler import JaroWinkler
jaro_winkler = JaroWinkler()

def getJaroWinkler(s0, s1):
    return jaro_winkler.similarity(s0, s1)

In [4]:
from strsimpy.jaccard import Jaccard
jaccard = Jaccard(2)

def getJaccard(s0, s1):
    return 1-jaccard.distance(s0, s1)

In [5]:
def calcMatrix(a0, a1, f):
    return [[f(e0,e1) for e1 in a1] for e0 in a0]
            
#def getTable(a0, a1, f):
#    m = calcMatrix(a0, a1, f)
#    return pd.DataFrame(data=m, columns=a1, index=a0)

import seaborn as sns

def getTable(a0, a1, f, title):
    m = calcMatrix(a0, a1, f)
    df = pd.DataFrame(data=m, columns=a1, index=a0)
    #df = df.style.set_table_attributes("style='display:inline'").set_caption(title)
    cm = sns.light_palette("green", as_cmap=True)
    df = df.clip(0, 1)
    df = df.style.background_gradient(cmap=cm, high=1, low=0)
    return df

In [6]:
headers = [la_headers, london_headers, baltimore_headers, global_headers]
functions = [getJaccard, getJaroWinkler]

tables_jaccard = []
tables_jaro_winkler = []

f = getJaccard
tables_jaccard.append(getTable(la_headers, london_headers, f, "la - london"))
tables_jaccard.append(getTable(la_headers, baltimore_headers, f, "la - baltimore"))
tables_jaccard.append(getTable(baltimore_headers, london_headers, f, "baltimore - london"))
tables_jaccard.append(getTable(global_headers, london_headers, f, "global - london"))
tables_jaccard.append(getTable(global_headers, la_headers, f, "global - la"))
tables_jaccard.append(getTable(global_headers, baltimore_headers, f, "global - baltimore"))

f = getJaroWinkler
tables_jaro_winkler.append(getTable(la_headers, london_headers, f, "la - london"))
tables_jaro_winkler.append(getTable(la_headers, baltimore_headers, f, "la - baltimore"))
tables_jaro_winkler.append(getTable(baltimore_headers, london_headers, f, "baltimore - london"))
tables_jaro_winkler.append(getTable(global_headers, london_headers, f, "global - london"))
tables_jaro_winkler.append(getTable(global_headers, la_headers, f, "global - la"))
tables_jaro_winkler.append(getTable(global_headers, baltimore_headers, f, "global - baltimore"))


In [7]:
pd.DataFrame(data=headers, index=['la', 'london', 'baltimore', 'global'])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
la,DR Number,Date Reported,Date Occurred,Time Occurred,Area ID,Area Name,Reporting District,Crime Code,Crime Code Description,MO Codes,...,Weapon Description,Status Code,Status Description,Crime Code 1,Crime Code 2,Crime Code 3,Crime Code 4,Address,Cross Street,Location
london,soa_code,borough,major_category,minor_category,value,year,month,,,,...,,,,,,,,,,
baltimore,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,Post,District,Neighborhood,...,,,,,,,,,,
global,id,type,timestamp,city,district,weapon,,,,,...,,,,,,,,,,


# Jaccard

## la - london

In [8]:
tables_jaccard[0]

Unnamed: 0,soa_code,borough,major_category,minor_category,value,year,month
DR Number,0.0,0.0,0.0,0.0,0,0.0,0.0
Date Reported,0.0,0.0625,0.15,0.15,0,0.0,0.0
Date Occurred,0.0,0.0,0.0909091,0.0909091,0,0.0,0.0
Time Occurred,0.0,0.0,0.0,0.0,0,0.0,0.0
Area ID,0.0,0.0,0.0,0.0,0,0.125,0.0
Area Name,0.0,0.0,0.0,0.0,0,0.1,0.0
Reporting District,0.0,0.0454545,0.0357143,0.0740741,0,0.0,0.0
Crime Code,0.142857,0.0,0.0,0.0,0,0.0,0.0
Crime Code Description,0.0833333,0.0,0.0,0.0,0,0.0,0.0454545
MO Codes,0.166667,0.0,0.0,0.0,0,0.0,0.0


## la - baltimore

In [9]:
tables_jaccard[1]

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,Post,District,Neighborhood,Longitude,Latitude,Location 1,Premise,vri_name1,Total Incidents
DR Number,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Date Reported,0.1875,0.0,0.0,0.0588235,0.0,0.0,0.0666667,0.0,0.0,0.047619,0.0,0.0588235,0.0526316,0.0,0.0,0.0
Date Occurred,0.176471,0.0,0.0,0.0555556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0555556,0.05,0.0588235,0.0,0.0
Time Occurred,0.111111,0.2,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0588235,0.0526316,0.0
Area ID,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0909091,0.0,0.0526316
Area Name,0.0666667,0.0769231,0.0666667,0.0,0.0,0.0,0.0833333,0.0,0.0,0.0,0.0,0.0,0.0,0.0769231,0.142857,0.0
Reporting District,0.0416667,0.0454545,0.0416667,0.0434783,0.08,0.0,0.047619,0.0526316,0.411765,0.037037,0.0416667,0.0434783,0.04,0.0454545,0.0416667,0.0
Crime Code,0.307692,0.363636,0.7,0.0,0.0555556,0.0555556,0.0,0.0,0.0666667,0.0526316,0.0625,0.0666667,0.0,0.0,0.133333,0.0454545
Crime Code Description,0.173913,0.190476,0.35,0.130435,0.526316,0.0357143,0.0434783,0.0,0.04,0.0344828,0.08,0.0833333,0.12,0.0,0.08,0.03125
MO Codes,0.0,0.0,0.25,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0588235,0.0714286,0.0769231,0.0,0.0,0.0,0.05


## baltimore - london

In [10]:
tables_jaccard[2]

Unnamed: 0,soa_code,borough,major_category,minor_category,value,year,month
CrimeDate,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0
CrimeTime,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CrimeCode,0.153846,0.0,0.0,0.0,0.0,0.0,0.0
Location,0.0,0.0,0.117647,0.117647,0.0,0.0,0.1
Description,0.0,0.0,0.0,0.0,0.0,0.0,0.0769231
Inside/Outside,0.0625,0.0,0.0,0.0,0.0,0.0,0.0
Weapon,0.0,0.0,0.0,0.0,0.0,0.142857,0.125
Post,0.0,0.0,0.0,0.0,0.0,0.0,0.0
District,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Neighborhood,0.0588235,0.214286,0.0454545,0.0454545,0.0,0.0,0.0


## global - london

In [11]:
tables_jaccard[3]

Unnamed: 0,soa_code,borough,major_category,minor_category,value,year,month
id,0,0,0,0,0,0.0,0.0
type,0,0,0,0,0,0.0,0.0
timestamp,0,0,0,0,0,0.0,0.0
city,0,0,0,0,0,0.0,0.0
district,0,0,0,0,0,0.0,0.0
weapon,0,0,0,0,0,0.142857,0.125


## global - la

In [12]:
tables_jaccard[4]

Unnamed: 0,DR Number,Date Reported,Date Occurred,Time Occurred,Area ID,Area Name,Reporting District,Crime Code,Crime Code Description,MO Codes,Victim Age,Victim Sex,Victim Descent,Premise Code,Premise Description,Weapon Used Code,Weapon Description,Status Code,Status Description,Crime Code 1,Crime Code 2,Crime Code 3,Crime Code 4,Address,Cross Street,Location
id,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
type,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
timestamp,0,0.0,0,0.111111,0.0,0.142857,0.0869565,0.133333,0.173913,0.0714286,0.133333,0.133333,0.166667,0.0,0.0833333,0.0,0.0909091,0.0588235,0.136364,0.125,0.125,0.125,0.125,0.0769231,0.0,0.0714286
city,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
district,0,0.0,0,0.0,0.0,0.0,0.333333,0.0666667,0.04,0.0,0.142857,0.142857,0.111111,0.0588235,0.0869565,0.0,0.0454545,0.0,0.0434783,0.0625,0.0625,0.0625,0.0625,0.0,0.0588235,0.0
weapon,0,0.0666667,0,0.0,0.1,0.0833333,0.047619,0.0,0.0434783,0.0,0.0,0.0,0.0,0.0,0.0454545,0.25,0.235294,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0909091


## global - baltimore

In [13]:
tables_jaccard[5]

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,Post,District,Neighborhood,Longitude,Latitude,Location 1,Premise,vri_name1,Total Incidents
id,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0714286
type,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
timestamp,0.142857,0.166667,0.142857,0.0714286,0.125,0.0,0.0,0.1,0.0714286,0,0.0,0.0714286,0.0625,0.0,0.142857,0.047619
city,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.1,0.111111,0.0,0.0,0.0,0.0625
district,0.0714286,0.0833333,0.0714286,0.0,0.0625,0.0,0.0,0.111111,0.75,0,0.0,0.0,0.0,0.0833333,0.0714286,0.0
weapon,0.0,0.0,0.0,0.0909091,0.0714286,0.0,0.666667,0.0,0.0,0,0.0833333,0.0,0.0769231,0.0,0.0,0.0


# Jaro Winkler

## la - london

In [14]:
tables_jaro_winkler[0]

Unnamed: 0,soa_code,borough,major_category,minor_category,value,year,month
DR Number,0.412037,0.417989,0.404762,0.404762,0.540741,0.0,0.0
Date Reported,0.424145,0.406593,0.447253,0.447253,0.517949,0.384615,0.425641
Date Occurred,0.424145,0.406593,0.518926,0.518926,0.499145,0.384615,0.425641
Time Occurred,0.301282,0.406593,0.513919,0.518926,0.351282,0.442308,0.425641
Area ID,0.422619,0.428571,0.309524,0.47619,0.395238,0.615079,0.0
Area Name,0.490741,0.417989,0.493386,0.515873,0.374074,0.583333,0.0
Reporting District,0.287037,0.531746,0.47619,0.546958,0.418519,0.537037,0.477778
Crime Code,0.447222,0.495238,0.395238,0.485714,0.433333,0.4,0.433333
Crime Code Description,0.477273,0.292208,0.478355,0.535835,0.415152,0.363636,0.49697
MO Codes,0.583333,0.422619,0.464286,0.464286,0.441667,0.0,0.441667


## la - baltimore

In [15]:
tables_jaro_winkler[1]

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,Post,District,Neighborhood,Longitude,Latitude,Location 1,Premise,vri_name1,Total Incidents
DR Number,0.481481,0.481481,0.481481,0.0,0.468013,0.28836,0.0,0.0,0.412037,0.527778,0.481481,0.490741,0.0,0.502646,0.481481,0.451852
Date Reported,0.51339,0.458689,0.500712,0.535256,0.557887,0.447802,0.574786,0.442308,0.602564,0.547009,0.410256,0.535256,0.485897,0.479853,0.458689,0.509402
Date Occurred,0.51339,0.458689,0.410256,0.424145,0.47972,0.513919,0.32906,0.442308,0.519231,0.49359,0.500712,0.519231,0.402564,0.479853,0.458689,0.509402
Time Occurred,0.584046,0.51339,0.580057,0.301282,0.390443,0.574481,0.41453,0.0,0.424145,0.463675,0.500712,0.424145,0.399145,0.54304,0.521368,0.505983
Area ID,0.588624,0.502646,0.502646,0.422619,0.322511,0.309524,0.539683,0.0,0.422619,0.40873,0.0,0.422619,0.495238,0.52381,0.502646,0.542857
Area Name,0.62963,0.62963,0.555556,0.412037,0.301347,0.455026,0.518519,0.0,0.412037,0.296296,0.407407,0.490741,0.474074,0.587302,0.62963,0.511111
Reporting District,0.477778,0.472222,0.388889,0.574074,0.640572,0.420635,0.62963,0.537037,0.567593,0.431481,0.555556,0.402778,0.592593,0.51455,0.388889,0.575926
Crime Code,0.877778,0.877778,0.983333,0.483333,0.504545,0.478571,0.511111,0.0,0.316667,0.494444,0.433333,0.447222,0.422222,0.671429,0.614815,0.472222
Crime Code Description,0.651034,0.651034,0.847796,0.280303,0.427273,0.510823,0.474747,0.431818,0.392677,0.479798,0.375421,0.392677,0.367677,0.598846,0.542088,0.558586
MO Codes,0.490741,0.490741,0.564815,0.416667,0.0,0.511905,0.430556,0.458333,0.0,0.305556,0.569444,0.5,0.408333,0.345238,0.412037,0.505556


## baltimore - london

In [16]:
tables_jaro_winkler[2]

Unnamed: 0,soa_code,borough,major_category,minor_category,value,year,month
CrimeDate,0.412037,0.417989,0.504233,0.587302,0.437037,0.407407,0.437037
CrimeTime,0.412037,0.417989,0.404762,0.493386,0.437037,0.407407,0.437037
CrimeCode,0.458333,0.502646,0.410053,0.504233,0.437037,0.407407,0.437037
Location,0.583333,0.511905,0.594048,0.64881,0.441667,0.458333,0.55
Description,0.549242,0.411255,0.46645,0.470563,0.430303,0.560606,0.430303
Inside/Outside,0.529762,0.404762,0.261905,0.357143,0.347619,0.440476,0.514286
Weapon,0.361111,0.436508,0.492063,0.460317,0.455556,0.611111,0.0
Post,0.416667,0.464286,0.547619,0.547619,0.0,0.0,0.633333
District,0.5,0.422619,0.418651,0.511905,0.0,0.458333,0.441667
Neighborhood,0.541667,0.576984,0.488095,0.539683,0.427778,0.555556,0.355556


## global - london

In [17]:
tables_jaro_winkler[3]

Unnamed: 0,soa_code,borough,major_category,minor_category,value,year,month
id,0.0,0.0,0.0,0.52381,0.0,0.0,0.0
type,0.0,0.0,0.440476,0.440476,0.483333,0.5,0.0
timestamp,0.0,0.0,0.493386,0.504233,0.437037,0.453704,0.374074
city,0.0,0.0,0.547619,0.543651,0.0,0.0,0.483333
district,0.5,0.422619,0.418651,0.511905,0.0,0.458333,0.441667
weapon,0.361111,0.436508,0.492063,0.460317,0.455556,0.611111,0.0


## global - la

In [18]:
tables_jaro_winkler[4]

Unnamed: 0,DR Number,Date Reported,Date Occurred,Time Occurred,Area ID,Area Name,Reporting District,Crime Code,Crime Code Description,MO Codes,Victim Age,Victim Sex,Victim Descent,Premise Code,Premise Description,Weapon Used Code,Weapon Description,Status Code,Status Description,Crime Code 1,Crime Code 2,Crime Code 3,Crime Code 4,Address,Cross Street,Location
id,0.0,0.0,0.0,0.525641,0.0,0.0,0.518519,0.533333,0.69697,0.0,0.533333,0.533333,0.52381,0.527778,0.517544,0.0,0.0,0.0,0.0,0.527778,0.527778,0.527778,0.527778,0.547619,0.0,0.0
type,0.0,0.549145,0.551282,0.442308,0.464286,0.453704,0.527778,0.45,0.431818,0.458333,0.45,0.45,0.547619,0.444444,0.434211,0.375,0.37037,0.44697,0.537037,0.444444,0.444444,0.444444,0.444444,0.464286,0.0,0.0
timestamp,0.407407,0.51339,0.410256,0.521368,0.502646,0.555556,0.5,0.544444,0.594276,0.490741,0.433333,0.433333,0.570899,0.509259,0.539571,0.481481,0.472222,0.51936,0.555556,0.527778,0.527778,0.527778,0.527778,0.502646,0.462963,0.324074
city,0.0,0.442308,0.442308,0.442308,0.0,0.0,0.37037,0.45,0.431818,0.0,0.572222,0.572222,0.543651,0.444444,0.434211,0.0,0.0,0.44697,0.435185,0.444444,0.444444,0.444444,0.444444,0.0,0.444444,0.583333
district,0.0,0.535256,0.424145,0.424145,0.422619,0.412037,0.567593,0.316667,0.477273,0.0,0.55,0.55,0.594048,0.430556,0.577485,0.395833,0.567593,0.438131,0.583333,0.305556,0.305556,0.305556,0.305556,0.490079,0.527778,0.5
weapon,0.0,0.574786,0.32906,0.41453,0.539683,0.518519,0.62963,0.511111,0.474747,0.430556,0.0,0.0,0.0,0.5,0.406433,0.715278,0.703704,0.505051,0.314815,0.5,0.5,0.5,0.5,0.0,0.416667,0.513889


## global - baltimore

In [19]:
tables_jaro_winkler[5]

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,Post,District,Neighborhood,Longitude,Latitude,Location 1,Premise,vri_name1,Total Incidents
id,0.537037,0.537037,0.537037,0.0,0.0,0.714286,0.0,0.0,0.541667,0.527778,0.0,0.541667,0.0,0.0,0.537037,0.0
type,0.453704,0.453704,0.453704,0.0,0.393939,0.440476,0.444444,0.0,0.458333,0.444444,0.0,0.458333,0.45,0.464286,0.0,0.438889
timestamp,0.637037,0.62963,0.555556,0.324074,0.5367,0.493386,0.425926,0.407407,0.564815,0.296296,0.481481,0.569444,0.433333,0.588624,0.555556,0.451852
city,0.453704,0.453704,0.453704,0.583333,0.560606,0.440476,0.0,0.5,0.583333,0.444444,0.574074,0.416667,0.572222,0.0,0.453704,0.438889
district,0.458333,0.458333,0.324074,0.5,0.598485,0.594048,0.0,0.583333,0.916667,0.472222,0.490741,0.472222,0.447222,0.490079,0.324074,0.505556
weapon,0.425926,0.425926,0.518519,0.513889,0.505051,0.325397,0.888889,0.0,0.0,0.5,0.518519,0.430556,0.488889,0.436508,0.351852,0.455556
