In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns
import scipy.sparse as sparse
import networkx as nx
from sklearn.cluster import KMeans
import geopandas


def dsog(scbm, K, K_clusters, method="DSoG", row=True):
    """
    Spectral co-clustering based on the DSoG or SoG
    param scbm: a multi-layer directed network, L*n*n
    param K: embedding dimension 
    param K_clusters: the number of clusters
    param method: {"SoG", "DSoG"} 
    """
    # K leading eigenvectors
    if row:
        square = np.zeros_like(scbm)
        for l in range(scbm.shape[0]):
            Al_sparse = sparse.coo_matrix(scbm[l])
            square[l] = Al_sparse.dot(Al_sparse.T).toarray()
    else:
        square = np.zeros_like(scbm)
        for l in range(scbm.shape[0]):
            Al_sparse = sparse.coo_matrix(scbm[l])
            square[l] = Al_sparse.T.dot(Al_sparse).toarray()

    if method == "SoG":
        # the sum of Gram matriecs
        _, v = np.linalg.eigh(np.sum(square, axis=0))
        vs = v[:, ::-1][:, 0:K]  # ascending order
    else:
        # the bias-adjusted sum of Gram matriecs
        _, v = np.linalg.eigh(np.sum(square, axis=0) - np.diag(np.sum(scbm, axis=0).sum(axis=1)))
        vs = v[:, ::-1][:, 0:K]

    # k-means
    k_means = KMeans(init="k-means++", n_clusters=K_clusters, n_init=scbm.shape[1])
    k_means.fit(vs)
    return vs, k_means.labels_


def k_eigenvaules(adjs, row=True):
    """Returns the eigenvalues"""
    square = np.zeros_like(adjs)
    if not row: adjs = adjs.transpose((0, 2, 1))
    for l in range(adjs.shape[0]):
        Al_sparse = sparse.coo_matrix(adjs[l])
        square[l] = Al_sparse.dot(Al_sparse.T).toarray()
    # the bias-adjusted sum of squared adjacency matriecs
    evalues, _ = np.linalg.eigh(np.sum(square, axis=0) - np.diag(np.sum(adjs, axis=0).sum(axis=1)))
    # evalues, _ = np.linalg.eigh(np.sum(square, axis=0))
    return evalues[::-1]


def rearrangement(labels, K):
    """Adjust the order of clustering results"""
    counts = []
    cluster_position = []
    labels_r = np.ones(0)
    for i in range(K):
        label = np.where(labels == i)[0]
        cluster_position.append(label)
        counts.append(label.shape[0])
        labels_r = np.hstack((labels_r, label))
    return labels_r.astype(int), cluster_position, counts

## Creat directed networks


In [2]:
items_group1 = ["Pastry", "Rice, paddy (rice milled equivalent)", "Rice, milled", "Breakfast cereals", 
                "Mixes and doughs for the preparation of bakers' wares", "Food preparations of flour, meal or malt extract", 
                "Communion wafers, empty cachets of a kind suitable for pharmaceutical use, sealing wafers, rice paper and similar products.", 
                "Sugar and syrups n.e.c.", "Sugar confectionery", "Prepared nuts", "Vegetables preserved (frozen)", 
                "Juice of fruits n.e.c.", "Orange juice", "Fruit prepared n.e.c.", "Other non-alcoholic caloric beverages", 
                "Undenatured ethyl alcohol of an alcoholic strength by volume of less than 80% vol; spirits, liqueurs and other spirituous beverages", 
                "Food wastes", "Coffee, decaffeinated or roasted", "Coffee, green", "Chocolate products nes", "Pepper (Piper spp.), raw", 
                "Dog or cat food, put up for retail sale", "Food preparations n.e.c.", "Crude organic material n.e.c."]
      
areacodes = pd.read_csv("AreaCodes.csv")

value_threshold = 100 # Deciding whether or not to have an edge
items = items_group1
year = 2020
Adj_ = np.zeros((len(items), areacodes.shape[0], areacodes.shape[0]))
for j in range(len(items)):
    Export_value = pd.read_csv("Items Yearly Value Matrix/" + items[j] + " Export " + str(year) + ".csv", index_col = 0, encoding_errors="ignore")
    Import_value = pd.read_csv("Items Yearly Value Matrix/" + items[j] + " Import " + str(year) + ".csv", index_col = 0, encoding_errors="ignore")
    trade_value = np.maximum(Export_value.values, Import_value.values) # Inconsistent statistical standard between countries
    Adj_[j] = np.where(trade_value > value_threshold, 1, 0)

# remove unconnected countries
D = len(items)/2 # a degree of zero in more than half the layers
# the mean total in/out-degree
mtd = np.concatenate((Adj_.sum(axis=1).sum(axis=0)[np.where(np.where(Adj_.sum(axis=1) <=0, 0, 1).sum(axis=0) < D)[0]], 
                      Adj_.sum(axis=2).sum(axis=0)[np.where(np.where(Adj_.sum(axis=2) <=0, 0, 1).sum(axis=0) < D)[0]])).mean()

# Summation of in/out-degree
degree_threshold = int(mtd) + 1 # If (summation) out/in-degree <= degree_threshold, Then del the nodes
del_nodes = np.union1d(np.where(Adj_.sum(axis=1).sum(axis=0) <= degree_threshold)[0], 
                       np.where(Adj_.sum(axis=2).sum(axis=0) <= degree_threshold)[0])
Adj = np.delete(Adj_, del_nodes, axis=1)
Adj = np.delete(Adj, del_nodes, axis=2)
areacodes_list = areacodes.iloc[np.delete(np.arange(220), del_nodes),]
country_list = np.delete(areacodes["Area"].values, del_nodes)

## Scree plot

In [3]:
# row 
font = {"family": "Times New Roman", "weight": "normal", "size": 13}    
evalues = k_eigenvaules(Adj) / Adj.shape[1]        
plt.figure(figsize=(4, 10/3))
plt.scatter(range(1, 4 + 1), evalues[:4], color="#62B17C", marker='o', s=10)  
plt.scatter(range(4 + 1, 50 + 1), evalues[4:50], color='#d3d3d3', marker='o', s=10)  
plt.xticks([1, 10, 20, 30, 40, 50], [1, 10, 20, 30, 40, 50])
plt.xlabel('Index', font)
plt.ylabel('Eigenvalues', font)
plt.tight_layout()
plt.show()

# column
evalues = k_eigenvaules(Adj, row=False) / Adj.shape[1]        
plt.figure(figsize=(4, 10/3))
plt.scatter(range(1, 4 + 1), evalues[:4], color="#62B17C", marker='o', s=10)  
plt.scatter(range(4 + 1, 50 + 1), evalues[4:50], color='#d3d3d3', marker='o', s=10)  
plt.xticks([1, 10, 20, 30, 40, 50], [1, 10, 20, 30, 40, 50])
plt.xlabel('Index', font)
plt.ylabel('Eigenvalues', font)
plt.tight_layout()
plt.show()

## Davies bouldin score

In [4]:
# the number of row clusters
from sklearn.metrics import davies_bouldin_score
db_scores = []
for k in range(2, 11):
    vs_debias, dsog_labels= dsog(Adj, K=4, K_clusters=k, method="DSoG", row=True)
    db_scores.append(davies_bouldin_score(vs_debias, dsog_labels))

plt.figure(figsize=(4, 10/3))
plt.plot(range(2, 11), db_scores, color="#62B17C", marker='o')
plt.xlabel('Number of Clusters', font)
plt.ylabel('Davies-Bouldin Score', font)
plt.xticks((np.arange(2, 11)))
plt.tight_layout()
plt.show()

# the number of column clusters
db_scores = []
for k in range(2, 11):
    vs_debias, dsog_labels = dsog(Adj, K=4, K_clusters=k, method="DSoG", row=False)
    db_scores.append(davies_bouldin_score(vs_debias, dsog_labels))

plt.figure(figsize=(4, 10/3))
plt.plot(range(2, 11), db_scores, color="#62B17C", marker='o')
plt.xlabel('Number of Clusters', font)
plt.ylabel('Davies-Bouldin Score', font)
plt.xticks((np.arange(2, 11)))
plt.tight_layout()
plt.show()

## Clustering

In [5]:
# Row clustring 
Ky = 5
_, row_labels = dsog(Adj, K=4, K_clusters=Ky, method="DSoG", row=True)
_, cluster_position_r, _ = rearrangement(row_labels, Ky)

world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
fao_to_world = pd.read_csv("World and FAO cross-reference table group1.csv")
fao_to_world.rename(columns={"Area": "FAO Area"}, inplace=True)
world = world.merge(fao_to_world)
world["row community"] = 1 * world["FAO Area"].isin(country_list[cluster_position_r[0]]) + 2 * world["FAO Area"].isin(country_list[cluster_position_r[1]]) \
                         + 3 * world["FAO Area"].isin(country_list[cluster_position_r[2]]) + 4 * world["FAO Area"].isin(country_list[cluster_position_r[3]]) \
                         + 5 * world["FAO Area"].isin(country_list[cluster_position_r[4]])

world.loc[world["row community"] == 4,  "row community"] = "community 1"
world.loc[world["row community"] == 2,  "row community"] = "community 2"
world.loc[world["row community"] == 3,  "row community"] = "community 3"
world.loc[world["row community"] == 1,  "row community"] = "community 4"
world.loc[world["row community"] == 5,  "row community"] = "community 5"

colorslist = ['#49b99a', '#7994d3', '#ec7f7c', '#dabe94', '#6ec4e7']
my_colormap = colors.LinearSegmentedColormap.from_list('',colorslist, N=5)

fig, ax = plt.subplots(figsize=(17, 13))
world.plot(ax=ax, alpha=0.4, color="#CCCCCC")
world_ = world.loc[world["row community"] != 0, :]
world_.plot(column="row community", ax=ax, categorical=True, legend=True, 
            legend_kwds={"loc": "center left", "shadow": True, 'frameon':True, "prop":{"family": "Times New Roman", "weight": "normal", "size": 20}}, cmap=my_colormap, edgecolor="white", linewidth=.3)
plt.axis("off")
plt.tight_layout()
plt.show()

In [6]:
# Column clustring 
Kz = 5
_, column_labels = dsog(Adj, K=4, K_clusters=5, method="DSoG", row=False)
_, cluster_position_c, _ = rearrangement(column_labels, Kz)

world["column community"] = 1 * world["FAO Area"].isin(country_list[cluster_position_c[0]]) + 2 * world["FAO Area"].isin(country_list[cluster_position_c[1]]) \
                          + 3 * world["FAO Area"].isin(country_list[cluster_position_c[2]]) + 4 * world["FAO Area"].isin(country_list[cluster_position_c[3]]) \
                          + 5 * world["FAO Area"].isin(country_list[cluster_position_c[4]])

world.loc[world["column community"] == 5,  "column community"] = "community 1"
world.loc[world["column community"] == 4,  "column community"] = "community 4"
world.loc[world["column community"] == 2,  "column community"] = "community 2"
world.loc[world["column community"] == 3,  "column community"] = "community 3"
world.loc[world["column community"] == 1,  "column community"] = "community 5"

fig, ax = plt.subplots(figsize=(17, 13))
world.plot(ax=ax, alpha=0.4, color="#CCCCCC")
world_ = world.loc[world["column community"] != 0, :]
world_.plot(column="column community", ax=ax, categorical=True, legend=True, 
            legend_kwds={"loc": "center left", "shadow": True, "prop":{"family": "Times New Roman", "weight": "normal", "size": 20}}, cmap=my_colormap, edgecolor="white", linewidth=.3)
plt.axis("off")
plt.tight_layout()
plt.show()