In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix
from matplotlib.colors import ListedColormap
from ipywidgets import interact, interactive, fixed, interact_manual, Layout, GridspecLayout
import ipywidgets as widgets

X = np.loadtxt('X.csv')
X_test = np.loadtxt('X_test.csv',delimiter=',')
X_train = np.loadtxt('X_train.csv', delimiter=',')
y = np.loadtxt('y.csv',delimiter=',')
y_train = np.loadtxt('y_train.csv',delimiter=',')
y_test = np.loadtxt('y_test.csv', delimiter=',')
data = pd.read_csv('data.csv')
plt.style.use('ggplot')

In [2]:
# The correlations of Indicators
No_Sample = widgets.RadioButtons(options=[500,1000,2000,3000],layout={'width': 'max-content'}, description='Number of Samples',
style = {'description_width': 'initial'})
label = widgets.HTML(value='Please change indicators and the number of sample below: <br>(All indicators are normalized.)')
Indicator_1= widgets.Select(placeholder='Choose an Indicator',options=['Cases Rate','Deaths Rate','Retail&Recreation Mobility Rate',
                                                                         'Grocery&Pharmacy Mobility Rate', 
                                                                         'Parks Mobility Rate', 'Transit Stations Mobility Rate',
                                                                         'Workplace Mobility Rate', 
                                                                         'Residential Mobility Rate'],
                              description='1st Indicator :',
                              style = {'description_width': 'initial'})
Indicator_2= widgets.Select(placeholder='Choose an Indicator',options=['Cases Rate','Deaths Rate',
                                                                   'Retail&Recreation Mobility Rate', 
                                                                    'Grocery&Pharmacy Mobility Rate', 'Parks Mobility Rate', 
                                                                    'Transit Stations Mobility Rate','Workplace Mobility Rate',
                                                                    'Residential Mobility Rate'],
                              description='2nd Indicator :',
                              style = {'description_width': 'initial'})



def datag(No_Sample, Indicator_1,Indicator_2):
    h = .02  # step size in the mesh
    dc = {'Retail&Recreation Mobility Rate':0,'Grocery&Pharmacy Mobility Rate':1,'Parks Mobility Rate':2,
          'Transit Stations Mobility Rate':3,'Workplace Mobility Rate':4,'Residential Mobility Rate':5,
          'Cases Rate':6,'Deaths Rate':7}
    # Calculate min, max and limits
    x_min, x_max = X[0:No_Sample, dc[Indicator_1]].min() - .1, X[0:No_Sample, dc[Indicator_1]].max() + .1
    y_min, y_max = X[0:No_Sample, dc[Indicator_2]].min() - .1, X[0:No_Sample, dc[Indicator_2]].max() + .1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
       
    # Put the result into a color plot 
    fig = plt.figure(figsize=(10,7))
    plt.scatter(X[0:No_Sample, dc[Indicator_1]], X[0:No_Sample, dc[Indicator_2]]) 
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xlabel(Indicator_1,fontsize='xx-large')    
    plt.ylabel(Indicator_2, fontsize='xx-large')
    plt.title("Indicator Correlation",fontsize='xx-large')
    plt.show()


out = widgets.interactive_output(datag, {'No_Sample': No_Sample, 'Indicator_1': Indicator_1,
                                         'Indicator_2': Indicator_2})

DC = GridspecLayout(5, 8)

DC[0:1,0:2] = label
DC[1:2,0:2] = No_Sample
DC[2:3,0:2] = Indicator_1
DC[3:4,0:2] = Indicator_2
DC[:,3:8] = out

In [3]:
# Time Series Indicator Plot based on Countries 
Country = widgets.Dropdown(
    placeholder='Choose Someone',
    options=['Afghanistan', 'Angola', 'Argentina', 'Aruba', 'Australia',
       'Austria', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus',
       'Belgium', 'Belize', 'Benin', 'Bolivia', 'Botswana', 'Brazil',
       'Bulgaria', 'Cambodia', 'Cameroon', 'Canada', 'Chile', 'Colombia',
       'Croatia', 'Czechia', 'Denmark', 'Ecuador', 'Egypt', 'Estonia',
       'Fiji', 'Finland', 'France', 'Gabon', 'Georgia', 'Germany',
       'Ghana', 'Guatemala', 'Haiti', 'Honduras', 'Hungary', 'India',
       'Indonesia', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Jamaica',
       'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kuwait', 'Kyrgyzstan',
       'Laos', 'Latvia', 'Lebanon', 'Libya', 'Lithuania', 'Luxembourg',
       'Malaysia', 'Mali', 'Malta', 'Mauritius', 'Mexico', 'Moldova',
       'Mongolia', 'Mozambique', 'Nepal', 'Netherlands', 'Nicaragua',
       'Niger', 'Nigeria', 'Norway', 'Oman', 'Pakistan', 'Panama',
       'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar',
       'Romania', 'Rwanda', 'Senegal', 'Singapore', 'Slovakia',
       'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'Thailand', 'Togo',
       'Turkey', 'Uganda', 'Uruguay', 'Venezuela', 'Vietnam', 'Yemen',
       'Zambia', 'Zimbabwe'],
    description='Countries:',
    ensure_option=False,
    disabled=False
)
Indicator_1= widgets.Select(placeholder='Choose an Indicator',options=['Cases','Deaths','Retail&Recreation',
                                                                         'Grocery&Pharmacy', 
                                                                         'Parks', 'Transit Stations','Workplace', 
                                                                         'Residential'],
                              description='1st Indicator :',ensure_option=True,disabled=False,
                              style = {'description_width': 'initial'})
Indicator_2= widgets.Select(placeholder='Choose an Indicator',options=['Cases','Deaths','Retail&Recreation', 
                                                                    'Grocery&Pharmacy', 'Parks', 
                                                                    'Transit Stations','Workplace',
                                                                    'Residential'],
                              description='2nd Indicator :',ensure_option=True,disabled=False,
                              style = {'description_width': 'initial'})

def tseries(Country,Indicator_1,Indicator_2):
    dateb=data[['Date','CountryName','cases','deaths','popData2018', 'Region', 'IncomeGroup',
           'retail_recreation', 'grocery_pharmacy', 'parks', 'transit_stations',
           'workplace', 'residential']]

    dateb=dateb[dateb.CountryName==Country]
    daily=dateb.groupby(['Date'],sort=True).sum()
    daily.sort_index(inplace=True)
    XX = daily[['retail_recreation', 'grocery_pharmacy', 'parks', 'transit_stations','workplace', 'residential','cases','deaths']]
    XX = preprocessing.StandardScaler().fit(XX).transform(XX.astype(float))
    dc = {'Retail&Recreation':0,'Grocery&Pharmacy':1,'Parks':2,'Transit Stations':3,'Workplace':4,'Residential':5,'Cases':6,'Deaths':7}

    fig = plt.figure(figsize=(15,6))
    ax = plt.subplot()
    ax.set_title("COVID-19 Cases in %s" %(Country),fontsize='xx-large')
    ax.set_xlabel('Days',fontsize='xx-large')
    ax.set_ylabel('Change Rate',fontsize='xx-large')
    plt.xticks(rotation=90)

    ax.plot(daily.index,XX[:,dc[Indicator_1]], c = 'b' , marker = 'o', label=Indicator_1)
    ax.plot(daily.index,XX[:,dc[Indicator_2]], c = 'r', marker = 'v', label=Indicator_2)
    plt.legend()
    plt.show()

out = widgets.interactive_output(tseries, {'Country': Country, 'Indicator_1': Indicator_1,
                                         'Indicator_2': Indicator_2})

txt=widgets.HTML(value='In most of the countries, all mobility indicators decreases except residential mobility rate.')
TS = GridspecLayout(5, 7)

TS[0:1,0:2] = Country
TS[1:2,0:2] = Indicator_1
TS[2:3,0:2] = Indicator_2
TS[3:4,0:2] = txt
TS[0:5,2:7] = out

In [4]:
# K Value Selection
K_Slider = widgets.IntSlider(value = 10, min=0, max= 20, step=1,
                             description="Number of neighbors (K)",orientation='horizontal',
                             style = {'description_width': 'initial'},
                             continuous_update=False)


def Kcheck(K_Slider):
    K_Slider+=1
    mean_acc = np.zeros((K_Slider-1))
    std_acc = np.zeros((K_Slider-1))
    ConfustionMx = [];
    for n in range(1,K_Slider):

        #Train Model and Predict  
        neigh = KNeighborsClassifier(n_neighbors = n).fit(X_train,y_train)
        yhat=neigh.predict(X_test)
        mean_acc[n-1] = metrics.accuracy_score(y_test, yhat)
        std_acc[n-1]=np.std(yhat==y_test)/np.sqrt(yhat.shape[0])
    
    fig = plt.figure(figsize=(7,4))
    ax = fig.add_axes([0,0,1,1])
    ax.plot(range(1,K_Slider),mean_acc,'g')
    ax.fill_between(range(1,K_Slider),mean_acc - 1 * std_acc,mean_acc + 1 * std_acc, alpha=0.10)
    ax.legend(('Accuracy ', '+/- 3xstd'))
    plt.ylabel('Accuracy ')
    plt.xlabel('Number of Neighbors (K)')
    plt.show()

    
lbl = widgets.HTML(value = 'Maximum accuracy is 0.84 with <b>the K-value 7</b> it is seen on the graph. Increasing K-value leads to decrease KNN model accuracy.')
out = widgets.interactive_output(Kcheck, {'K_Slider': K_Slider })

KV = GridspecLayout(3, 9)

KV[0:1,0:3] = K_Slider
KV[1:2,0:3] = lbl
KV[:,4::] = out

In [5]:
#K error graph
kmin = widgets.IntSlider(value = 1, min=1, max= 8, step=1,
                             description="Starting",orientation='horizontal',
                             style = {'description_width': 'initial'},
                             continuous_update=False)

kmax = widgets.IntSlider(value = 9, min=9, max= 20, step=1,
                             description="Ending",orientation='horizontal',
                             style = {'description_width': 'initial'},
                             continuous_update=False)


def errgraph(Kmin, Kmax):
    Kmax+=1
    error = []
    # Calculating error for K values between 1 and 40
    for i in range(Kmin, Kmax):
        knn = KNeighborsClassifier(n_neighbors=i)
        knn.fit(X_train, y_train)
        pred_i = knn.predict(X_test)
        error.append(np.mean(pred_i != y_test))

    plt.figure(figsize=(12, 8))
    plt.plot(range(Kmin, Kmax), error, color='red', linestyle='dashed', marker='o',
             markerfacecolor='blue', markersize=10)
    plt.title('Error Rate K Value')
    plt.xlabel('K Value')
    plt.ylabel('Mean Error')


lbl = widgets.HTML(value = 'Minimum mean error is 0.162 with <b>the K-value 7</b>. It decreases by increasing the K value from 0 to 7  and increases with the greater K-value than 7. Please set the K value interval:')    
out=widgets.interactive_output(errgraph,{'Kmin' : kmin, 'Kmax' : kmax})

KE = GridspecLayout(4, 9)

KE[0:1,0:2] =lbl
KE[2:3,0:2] = kmin
KE[3:4,0:2] = kmax
KE[0::,3:9] = out



In [6]:
#KNN Model


K = widgets.IntSlider(value = 7, min=1, max= 14, step=1,
                             description="K Value",
                             style = {'description_width': 'initial'},
                             continuous_update=False)

No_Sample = widgets.RadioButtons(options=[500,1000,2000,3000], value = 1000,
                                 description='Number of Samples',
                                 style = {'description_width': 'initial'})

Indicator_1= widgets.Select(placeholder='Choose an Indicator', value = 'Parks',options=['Retail&Recreation',
                                                                         'Grocery&Pharmacy', 
                                                                         'Parks', 'Transit Stations','Workplace', 
                                                                         'Residential'],
                              description='1st Indicator :',ensure_option=True,disabled=False,
                              style = {'description_width': 'initial'})
Indicator_2= widgets.Select(placeholder='Choose an Indicator',value = 'Residential', options=['Retail&Recreation', 
                                                                    'Grocery&Pharmacy', 'Parks', 
                                                                    'Transit Stations','Workplace',
                                                                    'Residential'],
                              description='2nd Indicator :',ensure_option=True,disabled=False,
                              style = {'description_width': 'initial'})

def knn(K,No_Sample,Indicator_1, Indicator_2):
    h = .02
    dc = {'Retail&Recreation':0,'Grocery&Pharmacy':1,'Parks':2,'Transit Stations':3,'Workplace':4,'Residential':5,'Cases':6,'Deaths':7}
    # Create color maps
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA','#00AAFF','#f8ff7e'])
    cmap_bold = ListedColormap(['#FF0000', '#00FF00','#0045FF','#BCC702'])

    # we create an instance of Neighbours Classifier and fit the data.
    clf = KNeighborsClassifier(K, weights='distance')
    clf.fit(X[:No_Sample,[dc[Indicator_1],dc[Indicator_2]]], y[:No_Sample])

    # calculate min, max and limits
    x_min, x_max = X[:No_Sample, dc[Indicator_1]].min() - 1, X[:No_Sample, dc[Indicator_1]].max() + 1
    y_min, y_max = X[:No_Sample, dc[Indicator_2]].min() - 1, X[:No_Sample, dc[Indicator_2]].max() + 1

    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    # predict class using data and kNN classifier
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    fig = plt.figure(figsize = (12,9))
    plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

    # Plot also the training points
    plt.scatter(X[:No_Sample, dc[Indicator_1]], X[:No_Sample, dc[Indicator_2]], c=y[:No_Sample], cmap=cmap_bold)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.title("4-Class classification (K = %i)" % (K),fontsize='xx-large')
    plt.xlabel(Indicator_1,fontsize='xx-large')
    plt.ylabel(Indicator_2,fontsize='xx-large')
    plt.show()

out = widgets.interactive_output(knn, {'K': K, 'No_Sample':No_Sample,'Indicator_1' : Indicator_1,
                                      'Indicator_2' : Indicator_2 })

txt = widgets.HTML(value='<br>The principle behind nearest neighbor methods is to find a predefined number of training samples closest in distance to the new point, and predict the label from these. The number of samples can be a user-defined constant (k-nearest neighbor learning), or vary based on the local density of points (radius-based neighbor learning.')

KNN = GridspecLayout(4, 6)
KNN[0:1,0:2] = txt
KNN[1:2,0:2]=K
KNN[2:3,0:2]=No_Sample
KNN[3:4,1:2]=Indicator_1
KNN[3:4,2:4]=Indicator_2
KNN[0:3,2:6]=out
KNN[0:1,0:2].layout.height = 'auto'
KNN[1:2,0:2].layout.height = 'auto'
KNN[2:3,0:2].layout.height = 'auto'
KNN[3:4,1:2].layout.height = 'auto'
KNN[3:4,2:4].layout.height = 'auto'
KNN[0:3,2:6].layout.height = 'auto'


In [7]:
# Evaluation
K = widgets.IntSlider(value = 7, min=1, max= 14, step=1,
                             description="K Value",orientation='horizontal',
                             style = {'description_width': 'initial'},
                             continuous_update=False)


def comatrx(K):
    class_names=['High Income', 'Low Income', 'Low Middle Income', 'Upper Middle Income']
    X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42)
    neigh = KNeighborsClassifier(K).fit(X_train,y_train)

    np.set_printoptions(precision=2)

    # Plot non-normalized confusion matrix
    fig , (ax1,ax2) = plt.subplots(1,2,sharey=True,sharex=True,figsize=(10,10))
    
    fig.subplots_adjust(left = 0.125,  # the left side of the subplots of the figure
                        right = 0.9 ,  # the right side of the subplots of the figure
                        bottom = 0.1,  # the bottom of the subplots of the figure
                        top = 0.6,     # the top of the subplots of the figure
                        wspace = 0.8,  # the amount of width reserved for space between subplots,
                                      # expressed as a fraction of the average axis width
                        hspace = 0.2,  # the amount of height reserved for space between subplots,
                                      # expressed as a fraction of the average axis height
                        )
    disp = plot_confusion_matrix(neigh, X_test, y_test,
                                         display_labels=class_names,
                                         cmap=plt.cm.Blues,
                                         normalize=None,xticks_rotation='vertical', ax=ax1)
    
    disp.ax_.set_title("Confusion matrix,\n without normalization")
    disp.ax_

    disp2 = plot_confusion_matrix(neigh, X_test, y_test,
                                         display_labels=class_names,
                                         cmap=plt.cm.Blues,
                                         normalize='true',xticks_rotation='vertical', ax=ax2)
    
    disp2.ax_.set_title("Normalized\n confusion matrix")
    



txt = widgets.HTML(value='The confusion_matrix function evaluates classification accuracy by computing the confusion matrix with each row corresponding to the true class. You can see that different K values gives different classification accuracies. Please change K value too see different accuracies based on classes.')

out = widgets.interactive_output(comatrx, {'K':K})

CM = GridspecLayout(8, 8)
CM[0:1,0::] = txt
CM[1:7,1::] = out
CM[7:8,3::] = K


In [8]:
labels = ['Data Exploration','K-Value Selection','KNN Model', 'Model Evaluation']
tab = widgets.Tab()
for i in range(len(labels)): 
    tab.set_title(str(i),labels[i])

    
dataacc = widgets.Accordion(children=[DC, TS], titles=('DC', 'TS'))
dataacc.set_title(0,'Indicator Correlation')
dataacc.set_title(1,'Time Series Graph')

kvalueacc = widgets.Accordion(children=[KV, KE], titles=('KV', 'KE'))
kvalueacc.set_title(0,'K-Value Accuracy Graph')
kvalueacc.set_title(1,'K-Vale Error Graph')



children = [ dataacc,
            kvalueacc,
            KNN,
            CM]
tab.children = children
 
tab

Tab(children=(Accordion(children=(GridspecLayout(children=(HTML(value='Please change indicators and the number…