In [32]:
#Comparing generated json file when Normalization is True and False for knn_raw

In [45]:
"""
Experiment summary
------------------
Treat each province/state in a country cases over time
as a vector, do a simple K-Nearest Neighbor between 
countries. What country has the most similar trajectory
to a given country?
"""

import sys
sys.path.insert(0, '..')

from utils import data
import os
import sklearn
import numpy as np
from sklearn.neighbors import (
    KNeighborsClassifier,
    DistanceMetric
)
import json

# ------------ HYPERPARAMETERS -------------
BASE_PATH = '../COVID-19/csse_covid_19_data/'
N_NEIGHBORS = 5
MIN_CASES = 1000
NORMALIZE = False
# ------------------------------------------

confirmed = os.path.join(
    BASE_PATH, 
    'csse_covid_19_time_series',
    'time_series_covid19_confirmed_global.csv')
confirmed = data.load_csv_data(confirmed)
features = []
targets = []

for val in np.unique(confirmed["Country/Region"]):
    df = data.filter_by_attribute(
        confirmed, "Country/Region", val)
    cases, labels = data.get_cases_chronologically(df)
    features.append(cases)
    targets.append(labels)

features = np.concatenate(features, axis=0)
targets = np.concatenate(targets, axis=0)
predictions = {}

for _dist in ['minkowski', 'manhattan']:
    for val in np.unique(confirmed["Country/Region"]):
        # test data
        df = data.filter_by_attribute(
            confirmed, "Country/Region", val)
        cases, labels = data.get_cases_chronologically(df)

        # filter the rest of the data to get rid of the country we are
        # trying to predict
        mask = targets[:, 1] != val
        tr_features = features[mask]
        tr_targets = targets[mask][:, 1]

        above_min_cases = tr_features.sum(axis=-1) > MIN_CASES
        tr_features = tr_features[above_min_cases]
        if NORMALIZE:
            tr_features = tr_features / tr_features.sum(axis=-1, keepdims=True)
        tr_targets = tr_targets[above_min_cases]

        # train knn
        knn = KNeighborsClassifier(n_neighbors=N_NEIGHBORS, metric=_dist)
        knn.fit(tr_features, tr_targets)

        # predict
        cases = cases.sum(axis=0, keepdims=True)
        # nearest country to this one based on trajectory
        label = knn.predict(cases)
        
        if val not in predictions:
            predictions[val] = {}
        predictions[val][_dist] = label.tolist()

with open('results/knn_rawNormalizeFalse.json', 'w') as f:
    json.dump(predictions, f, indent=4)

In [46]:
"""
Experiment summary
------------------
Treat each province/state in a country cases over time
as a vector, do a simple K-Nearest Neighbor between 
countries. What country has the most similar trajectory
to a given country?
"""

import sys
sys.path.insert(0, '..')

from utils import data
import os
import sklearn
import numpy as np
from sklearn.neighbors import (
    KNeighborsClassifier,
    DistanceMetric
)
import json

# ------------ HYPERPARAMETERS -------------
BASE_PATH = '../COVID-19/csse_covid_19_data/'
N_NEIGHBORS = 5
MIN_CASES = 1000
NORMALIZE = True
# ------------------------------------------

confirmed = os.path.join(
    BASE_PATH, 
    'csse_covid_19_time_series',
    'time_series_covid19_confirmed_global.csv')
confirmed = data.load_csv_data(confirmed)
features = []
targets = []

for val in np.unique(confirmed["Country/Region"]):
    df = data.filter_by_attribute(
        confirmed, "Country/Region", val)
    cases, labels = data.get_cases_chronologically(df)
    features.append(cases)
    targets.append(labels)

features = np.concatenate(features, axis=0)
targets = np.concatenate(targets, axis=0)
predictions = {}

for _dist in ['minkowski', 'manhattan']:
    for val in np.unique(confirmed["Country/Region"]):
        # test data
        df = data.filter_by_attribute(
            confirmed, "Country/Region", val)
        cases, labels = data.get_cases_chronologically(df)

        # filter the rest of the data to get rid of the country we are
        # trying to predict
        mask = targets[:, 1] != val
        tr_features = features[mask]
        tr_targets = targets[mask][:, 1]

        above_min_cases = tr_features.sum(axis=-1) > MIN_CASES
        tr_features = tr_features[above_min_cases]
        if NORMALIZE:
            tr_features = tr_features / tr_features.sum(axis=-1, keepdims=True)
        tr_targets = tr_targets[above_min_cases]

        # train knn
        knn = KNeighborsClassifier(n_neighbors=N_NEIGHBORS, metric=_dist)
        knn.fit(tr_features, tr_targets)

        # predict
        cases = cases.sum(axis=0, keepdims=True)
        # nearest country to this one based on trajectory
        label = knn.predict(cases)
        
        if val not in predictions:
            predictions[val] = {}
        predictions[val][_dist] = label.tolist()

with open('results/knn_rawNormalizeTrue.json', 'w') as f:
    json.dump(predictions, f, indent=4)

In [47]:
import json

NormTrue = open('results/knn_rawNormalizeTrue.json',)
NormFalse = open('results/knn_rawNormalizeFalse.json',)

NormTrueData = json.load(NormTrue)
NormFalseData = json.load(NormFalse)


MinKowskicompare = {}
Manhattancompare = {}


for country in NormTrueData.keys():
    
    v1 = NormTrueData[country]
    v2 = NormFalseData[country]
    
    MinkowskiCountry1 = v1['minkowski'][0]
    ManhattanCountry1 = v1['manhattan'][0]
    
    MinkowskiCountry2 = v2['minkowski'][0]
    ManhattanCountry2 = v2['manhattan'][0]
    
    if (MinkowskiCountry1, MinkowskiCountry2) not in MinKowskicompare:
        MinKowskicompare[(MinkowskiCountry1, MinkowskiCountry2)] = 1
        
    else:
        MinKowskicompare[(MinkowskiCountry1, MinkowskiCountry2)] += 1
        
    if (ManhattanCountry1, ManhattanCountry2) not in Manhattancompare:
        Manhattancompare[(ManhattanCountry1, ManhattanCountry2)] = 1
        
    else:
        Manhattancompare[(ManhattanCountry1, ManhattanCountry2)] += 1
       
print("Comparing MinKowski Distance")    
print(MinKowskicompare)

print("Comparing Manhattan Distance")
print(Manhattancompare)

import operator
print(max(MinKowskicompare.items(), key=operator.itemgetter(1))[0])
print(max(MinKowskicompare.values()))
print(max(Manhattancompare.items(), key=operator.itemgetter(1))[0])
print(max(Manhattancompare.values()))
    
NumSameMinKowski = 0
NumDiffMinkiwski = 0
NumSameManhattan = 0
NumDiffManhattan = 0

for k in MinKowskicompare.keys():
    if k[0] == k[1]:
        NumSameMinKowski += MinKowskicompare[k]
    else:
        NumDiffMinkiwski += MinKowskicompare[k]
        
print("Number of countries with same result for Minkowski Distance")
print(NumSameMinKowski)

print("Number of countries with different result Minkowski Distance")
print(NumDiffMinkiwski)

for k in Manhattancompare.keys():
    if k[0] == k[1]:
        NumSameManhattan += Manhattancompare[k]
    else:
        NumDiffManhattan += Manhattancompare[k]    
    
print("Number of countries with same result for Manhattan Distance")
print(NumSameManhattan)

print("Number of countries with different result Manhattan Distance")
print(NumDiffManhattan)    




Comparing MinKowski Distance
{('Cambodia', 'Cameroon'): 2, ('Cambodia', 'Canada'): 22, ('Cambodia', 'Kenya'): 1, ('Cambodia', 'Belize'): 1, ('Cambodia', 'Cabo Verde'): 4, ('Cambodia', 'Bhutan'): 1, ('Cambodia', 'Colombia'): 2, ('Cambodia', 'Ethiopia'): 2, ('France', 'Congo (Kinshasa)'): 1, ('Cambodia', 'Japan'): 1, ('Cambodia', 'Costa Rica'): 1, ('Cambodia', 'Andorra'): 4, ('Cambodia', 'Bosnia and Herzegovina'): 2, ('Cambodia', 'Iraq'): 2, ('Cambodia', 'Burundi'): 2, ('Cambodia', 'Bolivia'): 3, ('France', 'Andorra'): 1, ('Cambodia', 'Gambia'): 1, ('Cambodia', 'Antigua and Barbuda'): 1, ('Cambodia', 'Belarus'): 6, ('Cambodia', 'Burma'): 1, ('Cambodia', 'France'): 9, ('Cambodia', 'China'): 5, ('Cambodia', 'Azerbaijan'): 5, ('France', 'Algeria'): 1, ('Cambodia', 'Barbados'): 5, ('Laos', 'Netherlands'): 1, ('Cambodia', "Cote d'Ivoire"): 1, ('Cambodia', 'Belgium'): 8, ('Cambodia', 'Australia'): 7, ('Cambodia', 'United Kingdom'): 5, ('Cambodia', 'Algeria'): 3, ('Cambodia', 'Argentina'): 6, (

In [36]:
#Comparing generated json file when Normalization is True and False for knn_diff

In [37]:
"""
Experiment summary
------------------
Treat each province/state in a country cases over time
as a vector, do a simple K-Nearest Neighbor between 
countries. What country has the most similar trajectory
to a given country?

This one is different in that it uses the difference in cases
from day to day, rather than the raw number of cases.
"""

import sys
sys.path.insert(0, '..')

from utils import data
import os
import sklearn
import numpy as np
from sklearn.neighbors import (
    KNeighborsClassifier,
    DistanceMetric
)
import json

# ------------ HYPERPARAMETERS -------------
BASE_PATH = '../COVID-19/csse_covid_19_data/'
N_NEIGHBORS = 5
MIN_CASES = 1000
NORMALIZE = True
# ------------------------------------------

confirmed = os.path.join(
    BASE_PATH, 
    'csse_covid_19_time_series',
    'time_series_covid19_confirmed_global.csv')
confirmed = data.load_csv_data(confirmed)
features = []
targets = []

for val in np.unique(confirmed["Country/Region"]):
    df = data.filter_by_attribute(
        confirmed, "Country/Region", val)
    cases, labels = data.get_cases_chronologically(df)
    features.append(cases)
    targets.append(labels)

features = np.concatenate(features, axis=0)
targets = np.concatenate(targets, axis=0)
predictions = {}

for _dist in ['minkowski', 'manhattan']:
    for val in np.unique(confirmed["Country/Region"]):
        # test data
        df = data.filter_by_attribute(
            confirmed, "Country/Region", val)
        cases, labels = data.get_cases_chronologically(df)

        # filter the rest of the data to get rid of the country we are
        # trying to predict
        mask = targets[:, 1] != val
        tr_features = features[mask]
        tr_targets = targets[mask][:, 1]

        above_min_cases = tr_features.sum(axis=-1) > MIN_CASES
        tr_features = np.diff(tr_features[above_min_cases], axis=-1)
        if NORMALIZE:
            tr_features = tr_features / tr_features.sum(axis=-1, keepdims=True)

        tr_targets = tr_targets[above_min_cases]

        # train knn
        knn = KNeighborsClassifier(n_neighbors=N_NEIGHBORS, metric=_dist)
        knn.fit(tr_features, tr_targets)

        # predict
        cases = np.diff(cases.sum(axis=0, keepdims=True), axis=-1)
        # nearest country to this one based on trajectory
        label = knn.predict(cases)
        
        if val not in predictions:
            predictions[val] = {}
        predictions[val][_dist] = label.tolist()

with open('results/knnDiffNormalizeTrue.json', 'w') as f:
    json.dump(predictions, f, indent=4)
    


In [38]:
"""
Experiment summary
------------------
Treat each province/state in a country cases over time
as a vector, do a simple K-Nearest Neighbor between 
countries. What country has the most similar trajectory
to a given country?

This one is different in that it uses the difference in cases
from day to day, rather than the raw number of cases.
"""

import sys
sys.path.insert(0, '..')

from utils import data
import os
import sklearn
import numpy as np
from sklearn.neighbors import (
    KNeighborsClassifier,
    DistanceMetric
)
import json

# ------------ HYPERPARAMETERS -------------
BASE_PATH = '../COVID-19/csse_covid_19_data/'
N_NEIGHBORS = 5
MIN_CASES = 1000
NORMALIZE = False
# ------------------------------------------

confirmed = os.path.join(
    BASE_PATH, 
    'csse_covid_19_time_series',
    'time_series_covid19_confirmed_global.csv')
confirmed = data.load_csv_data(confirmed)
features = []
targets = []

for val in np.unique(confirmed["Country/Region"]):
    df = data.filter_by_attribute(
        confirmed, "Country/Region", val)
    cases, labels = data.get_cases_chronologically(df)
    features.append(cases)
    targets.append(labels)

features = np.concatenate(features, axis=0)
targets = np.concatenate(targets, axis=0)
predictions = {}

for _dist in ['minkowski', 'manhattan']:
    for val in np.unique(confirmed["Country/Region"]):
        # test data
        df = data.filter_by_attribute(
            confirmed, "Country/Region", val)
        cases, labels = data.get_cases_chronologically(df)

        # filter the rest of the data to get rid of the country we are
        # trying to predict
        mask = targets[:, 1] != val
        tr_features = features[mask]
        tr_targets = targets[mask][:, 1]

        above_min_cases = tr_features.sum(axis=-1) > MIN_CASES
        tr_features = np.diff(tr_features[above_min_cases], axis=-1)
        if NORMALIZE:
            tr_features = tr_features / tr_features.sum(axis=-1, keepdims=True)

        tr_targets = tr_targets[above_min_cases]

        # train knn
        knn = KNeighborsClassifier(n_neighbors=N_NEIGHBORS, metric=_dist)
        knn.fit(tr_features, tr_targets)

        # predict
        cases = np.diff(cases.sum(axis=0, keepdims=True), axis=-1)
        # nearest country to this one based on trajectory
        label = knn.predict(cases)
        
        if val not in predictions:
            predictions[val] = {}
        predictions[val][_dist] = label.tolist()

with open('results/knnDiffNormalizeFalse.json', 'w') as f:
    json.dump(predictions, f, indent=4)
    
    
    

In [48]:
import json

NormTrue = open('results/knnDiffNormalizeTrue.json',)
NormFalse = open('results/knnDiffNormalizeFalse.json',)

NormTrueData = json.load(NormTrue)
NormFalseData = json.load(NormFalse)


MinKowskicompare = {}
Manhattancompare = {}


for country in NormTrueData.keys():
    
    v1 = NormTrueData[country]
    v2 = NormFalseData[country]
    
    MinkowskiCountry1 = v1['minkowski'][0]
    ManhattanCountry1 = v1['manhattan'][0]
    
    MinkowskiCountry2 = v2['minkowski'][0]
    ManhattanCountry2 = v2['manhattan'][0]
    
    if (MinkowskiCountry1, MinkowskiCountry2) not in MinKowskicompare:
        MinKowskicompare[(MinkowskiCountry1, MinkowskiCountry2)] = 1
        
    else:
        MinKowskicompare[(MinkowskiCountry1, MinkowskiCountry2)] += 1
        
    if (ManhattanCountry1, ManhattanCountry2) not in Manhattancompare:
        Manhattancompare[(ManhattanCountry1, ManhattanCountry2)] = 1
        
    else:
        Manhattancompare[(ManhattanCountry1, ManhattanCountry2)] += 1
       
print("Comparing MinKowski Distance")    
print(MinKowskicompare)

print("Comparing Manhattan Distance")
print(Manhattancompare)

import operator
print(max(MinKowskicompare.items(), key=operator.itemgetter(1))[0])
print(max(MinKowskicompare.values()))
print(max(Manhattancompare.items(), key=operator.itemgetter(1))[0])
print(max(Manhattancompare.values()))


NumSameMinKowski = 0
NumDiffMinkiwski = 0
NumSameManhattan = 0
NumDiffManhattan = 0

for k in MinKowskicompare.keys():
    if k[0] == k[1]:
        NumSameMinKowski += MinKowskicompare[k]
    else:
        NumDiffMinkiwski += MinKowskicompare[k]
        
print("Number of countries with same result for Minkowski Distance")
print(NumSameMinKowski)

print("Number of countries with different result Minkowski Distance")
print(NumDiffMinkiwski)

for k in Manhattancompare.keys():
    if k[0] == k[1]:
        NumSameManhattan += Manhattancompare[k]
    else:
        NumDiffManhattan += Manhattancompare[k]    
    
print("Number of countries with same result for Manhattan Distance")
print(NumSameManhattan)

print("Number of countries with different result Manhattan Distance")
print(NumDiffManhattan)    

Comparing MinKowski Distance
{('Central African Republic', 'Congo (Kinshasa)'): 1, ('Antigua and Barbuda', 'Estonia'): 1, ('Canada', 'Canada'): 5, ('Burma', 'Chad'): 1, ('Fiji', 'Cabo Verde'): 1, ('France', 'United Kingdom'): 2, ('Cambodia', 'Colombia'): 2, ('Belgium', 'Bosnia and Herzegovina'): 1, ('China', 'China'): 3, ('Canada', 'Bulgaria'): 2, ('Canada', 'Croatia'): 1, ('France', 'Netherlands'): 1, ('Fiji', 'Cuba'): 1, ('Djibouti', 'Ethiopia'): 1, ('Antigua and Barbuda', 'Antigua and Barbuda'): 1, ('Armenia', 'Austria'): 1, ('Burma', 'China'): 1, ('France', 'France'): 2, ('Denmark', 'Australia'): 1, ('Fiji', 'Bahrain'): 3, ('Bulgaria', 'Armenia'): 1, ('Antigua and Barbuda', 'Canada'): 1, ('France', 'Argentina'): 1, ('Australia', 'Australia'): 6, ('Canada', 'Bosnia and Herzegovina'): 2, ('China', 'United Kingdom'): 1, ('Belize', 'Algeria'): 1, ('Cambodia', 'Antigua and Barbuda'): 1, ('Cambodia', 'Angola'): 1, ('Fiji', 'Angola'): 3, ('Netherlands', 'Netherlands'): 1, ('United Kingdom

In [40]:
#Comparing result between knn_raw.py and knn_diff.py

In [49]:
import json

knnRaw = open('results/knn_rawNormalizeTrue.json',)
knnDiff = open('results/knnDiffNormalizeTrue.json',)

knnRawData = json.load(knnRaw)
knnDiffData = json.load(knnDiff)


MinKowskicompare = {}
Manhattancompare = {}


for country in knnRawData.keys():
    
    v1 = knnRawData[country]
    v2 = knnDiffData[country]
    
    MinkowskiCountry1 = v1['minkowski'][0]
    ManhattanCountry1 = v1['manhattan'][0]
    
    MinkowskiCountry2 = v2['minkowski'][0]
    ManhattanCountry2 = v2['manhattan'][0]
    
    if (MinkowskiCountry1, MinkowskiCountry2) not in MinKowskicompare:
        MinKowskicompare[(MinkowskiCountry1, MinkowskiCountry2)] = 1
        
    else:
        MinKowskicompare[(MinkowskiCountry1, MinkowskiCountry2)] += 1
        
    if (ManhattanCountry1, ManhattanCountry2) not in Manhattancompare:
        Manhattancompare[(ManhattanCountry1, ManhattanCountry2)] = 1
        
    else:
        Manhattancompare[(ManhattanCountry1, ManhattanCountry2)] += 1
       
print("Comparing MinKowski Distance")    
print(MinKowskicompare)

print("Comparing Manhattan Distance")
print(Manhattancompare)

import operator
print(max(MinKowskicompare.items(), key=operator.itemgetter(1))[0])
print(max(MinKowskicompare.values()))
print(max(Manhattancompare.items(), key=operator.itemgetter(1))[0])
print(max(Manhattancompare.values()))

NumSameMinKowski = 0
NumDiffMinkiwski = 0
NumSameManhattan = 0
NumDiffManhattan = 0

for k in MinKowskicompare.keys():
    if k[0] == k[1]:
        NumSameMinKowski += MinKowskicompare[k]
    else:
        NumDiffMinkiwski += MinKowskicompare[k]
        
print("Number of countries with same result for Minkowski Distance")
print(NumSameMinKowski)

print("Number of countries with different result Minkowski Distance")
print(NumDiffMinkiwski)

for k in Manhattancompare.keys():
    if k[0] == k[1]:
        NumSameManhattan += Manhattancompare[k]
    else:
        NumDiffManhattan += Manhattancompare[k]    
    
print("Number of countries with same result for Manhattan Distance")
print(NumSameManhattan)

print("Number of countries with different result Manhattan Distance")
print(NumDiffManhattan)    

Comparing MinKowski Distance
{('Cambodia', 'Central African Republic'): 3, ('Cambodia', 'Antigua and Barbuda'): 5, ('Cambodia', 'Canada'): 20, ('Cambodia', 'Burma'): 2, ('Cambodia', 'Fiji'): 12, ('Cambodia', 'France'): 24, ('Cambodia', 'Cambodia'): 25, ('Cambodia', 'Belgium'): 1, ('France', 'China'): 3, ('Cambodia', 'Djibouti'): 1, ('Cambodia', 'Armenia'): 1, ('France', 'Burma'): 1, ('Cambodia', 'Denmark'): 1, ('Cambodia', 'Bulgaria'): 3, ('Cambodia', 'Australia'): 12, ('Cambodia', 'China'): 26, ('France', 'Belize'): 2, ('Laos', 'Fiji'): 1, ('Cambodia', 'Netherlands'): 5, ('Cambodia', 'United Kingdom'): 15, ('Cambodia', 'Afghanistan'): 3, ('Cambodia', 'Belize'): 3, ('Central African Republic', 'China'): 1, ('Cambodia', 'Cameroon'): 4, ('Cambodia', 'Barbados'): 2, ('Cambodia', 'Comoros'): 2, ('Cambodia', 'Croatia'): 1, ('Canada', 'Australia'): 2, ('Cambodia', 'Austria'): 1, ('Cambodia', 'Maldives'): 1, ('Central African Republic', 'Australia'): 1, ('Canada', 'Burma'): 1, ('Cambodia', 'G

In [42]:
#Does Doubleing number of nearest neighbors from 5 to 10 affect result for knn_diff.py

In [43]:
"""
Experiment summary
------------------
Treat each province/state in a country cases over time
as a vector, do a simple K-Nearest Neighbor between 
countries. What country has the most similar trajectory
to a given country?

This one is different in that it uses the difference in cases
from day to day, rather than the raw number of cases.
"""

import sys
sys.path.insert(0, '..')

from utils import data
import os
import sklearn
import numpy as np
from sklearn.neighbors import (
    KNeighborsClassifier,
    DistanceMetric
)
import json

# ------------ HYPERPARAMETERS -------------
BASE_PATH = '../COVID-19/csse_covid_19_data/'
N_NEIGHBORS = 10
MIN_CASES = 1000
NORMALIZE = True
# ------------------------------------------

confirmed = os.path.join(
    BASE_PATH, 
    'csse_covid_19_time_series',
    'time_series_covid19_confirmed_global.csv')
confirmed = data.load_csv_data(confirmed)
features = []
targets = []

for val in np.unique(confirmed["Country/Region"]):
    df = data.filter_by_attribute(
        confirmed, "Country/Region", val)
    cases, labels = data.get_cases_chronologically(df)
    features.append(cases)
    targets.append(labels)

features = np.concatenate(features, axis=0)
targets = np.concatenate(targets, axis=0)
predictions = {}

for _dist in ['minkowski', 'manhattan']:
    for val in np.unique(confirmed["Country/Region"]):
        # test data
        df = data.filter_by_attribute(
            confirmed, "Country/Region", val)
        cases, labels = data.get_cases_chronologically(df)

        # filter the rest of the data to get rid of the country we are
        # trying to predict
        mask = targets[:, 1] != val
        tr_features = features[mask]
        tr_targets = targets[mask][:, 1]

        above_min_cases = tr_features.sum(axis=-1) > MIN_CASES
        tr_features = np.diff(tr_features[above_min_cases], axis=-1)
        if NORMALIZE:
            tr_features = tr_features / tr_features.sum(axis=-1, keepdims=True)

        tr_targets = tr_targets[above_min_cases]

        # train knn
        knn = KNeighborsClassifier(n_neighbors=N_NEIGHBORS, metric=_dist)
        knn.fit(tr_features, tr_targets)

        # predict
        cases = np.diff(cases.sum(axis=0, keepdims=True), axis=-1)
        # nearest country to this one based on trajectory
        label = knn.predict(cases)
        
        if val not in predictions:
            predictions[val] = {}
        predictions[val][_dist] = label.tolist()

with open('results/knnDiffNormalizeTrue10.json', 'w') as f:
    json.dump(predictions, f, indent=4)
    

In [50]:
import json

knnDiff = open('results/knnDiffNormalizeTrue.json',)
knnDiff10 = open('results/knnDiffNormalizeTrue10.json',)

knnDiffData = json.load(knnDiff)
knnDiff10Data = json.load(knnDiff10)


MinKowskicompare = {}
Manhattancompare = {}


for country in knnDiffData.keys():
    
    v1 = knnDiffData[country]
    v2 = knnDiff10Data[country]
    
    MinkowskiCountry1 = v1['minkowski'][0]
    ManhattanCountry1 = v1['manhattan'][0]
    
    MinkowskiCountry2 = v2['minkowski'][0]
    ManhattanCountry2 = v2['manhattan'][0]
    
    if (MinkowskiCountry1, MinkowskiCountry2) not in MinKowskicompare:
        MinKowskicompare[(MinkowskiCountry1, MinkowskiCountry2)] = 1
        
    else:
        MinKowskicompare[(MinkowskiCountry1, MinkowskiCountry2)] += 1
        
    if (ManhattanCountry1, ManhattanCountry2) not in Manhattancompare:
        Manhattancompare[(ManhattanCountry1, ManhattanCountry2)] = 1
        
    else:
        Manhattancompare[(ManhattanCountry1, ManhattanCountry2)] += 1
       
print("Comparing MinKowski Distance")    
print(MinKowskicompare)

print("Comparing Manhattan Distance")
print(Manhattancompare)

import operator
print(max(MinKowskicompare.items(), key=operator.itemgetter(1))[0])
print(max(MinKowskicompare.values()))
print(max(Manhattancompare.items(), key=operator.itemgetter(1))[0])
print(max(Manhattancompare.values()))

NumSameMinKowski = 0
NumDiffMinkiwski = 0
NumSameManhattan = 0
NumDiffManhattan = 0

for k in MinKowskicompare.keys():
    if k[0] == k[1]:
        NumSameMinKowski += MinKowskicompare[k]
    else:
        NumDiffMinkiwski += MinKowskicompare[k]
        
print("Number of countries with same result for Minkowski Distance")
print(NumSameMinKowski)

print("Number of countries with different result Minkowski Distance")
print(NumDiffMinkiwski)

for k in Manhattancompare.keys():
    if k[0] == k[1]:
        NumSameManhattan += Manhattancompare[k]
    else:
        NumDiffManhattan += Manhattancompare[k]    
    
print("Number of countries with same result for Manhattan Distance")
print(NumSameManhattan)

print("Number of countries with different result Manhattan Distance")
print(NumDiffManhattan)    

Comparing MinKowski Distance
{('Central African Republic', 'Cambodia'): 1, ('Antigua and Barbuda', 'Antigua and Barbuda'): 2, ('Canada', 'Canada'): 9, ('Burma', 'Belgium'): 1, ('Fiji', 'Cambodia'): 10, ('France', 'France'): 21, ('Cambodia', 'Cambodia'): 17, ('Belgium', 'Austria'): 1, ('China', 'China'): 24, ('Canada', 'Bulgaria'): 1, ('Canada', 'United Kingdom'): 3, ('France', 'Canada'): 2, ('Djibouti', 'Cameroon'): 1, ('Antigua and Barbuda', 'France'): 3, ('Armenia', 'France'): 1, ('Burma', 'Azerbaijan'): 1, ('Denmark', 'Cambodia'): 1, ('Bulgaria', 'Botswana'): 1, ('Australia', 'Australia'): 9, ('Canada', 'France'): 3, ('Belize', 'Canada'): 2, ('Cambodia', 'Canada'): 4, ('Fiji', 'Canada'): 1, ('Netherlands', 'France'): 1, ('United Kingdom', 'United Kingdom'): 11, ('Afghanistan', 'Afghanistan'): 4, ('Australia', 'United Kingdom'): 2, ('United Kingdom', 'Canada'): 1, ('Canada', 'Cambodia'): 2, ('Cameroon', 'Cameroon'): 1, ('Barbados', 'France'): 1, ('Cambodia', 'Netherlands'): 2, ('Aust