In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import networkx as nx
import community.community_louvain as cl
from networkx.drawing.nx_pydot import graphviz_layout

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.cm as cm

import numpy as np
import pandas as pd
import copy
import pickle

import time
from tqdm import tqdm

# subgroup_cenInfo.pkl 불러오기 (centrality 연산결과)

In [None]:
with open('/content/drive/Shareddrives/22-1 데이터마이닝/TermProject/Louvain_Analysis/pickles/subgroup_cenInfo.pkl', 'rb') as f:
  subgroup_cenInfo = pickle.load(f)

In [None]:
subgroup_cenInfo

{'2011': {'0': {'wdeg': {'Albania': 29588174.0,
    'Andorra': 27682.0,
    'Austria': 652353808.0,
    'Belarus': 2193886.0,
    'Belgium': 745015556.0,
    'Bosnia Herzegovina': 145963676.0,
    'Bulgaria': 1033937527.0,
    'Bunkers': 63.0,
    'Burkina Faso': 216.0,
    'Croatia': 74297356.0,
    'Cyprus': 33903021.0,
    'Czechia': 541371353.0,
    'Denmark': 468544862.0,
    'Estonia': 37720267.0,
    'Faeroe Isds': 4523814.0,
    'Finland': 33545311.0,
    'Georgia': 63815378.0,
    'Germany': 1804376136.0,
    'Gibraltar': 821.0,
    'Greece': 256145126.0,
    'Greenland': 2090.0,
    'Hungary': 388964103.0,
    'Ireland': 16379980.0,
    'Israel': 382980057.0,
    'Italy': 1522723097.0,
    'Jordan': 39465612.0,
    'Kazakhstan': 173840967.0,
    'Latvia': 265114470.0,
    'Libya': 43065937.0,
    'Lithuania': 354414545.0,
    'Luxembourg': 16985652.0,
    'Malta': 8112434.0,
    'Montenegro': 7093148.0,
    'Netherlands': 1480466891.0,
    'North Macedonia': 37311216.0,
    '

# 저장되어 있는 partial_groups 불러오기

In [None]:
startYear = 2011
endYear = 2021

with open('/content/drive/Shareddrives/22-1 데이터마이닝/TermProject/Louvain_Analysis/pickles/partial_groups.pkl', 'rb') as f:
  partial_groups = pickle.load(f)

# max_iter 정의
max_iter = {}
for year in range(startYear, endYear+1):
  max_iter[str(year)] = partial_groups[str(year)]['group_number']
  print("{}년 군집 수 :".format(year)+str(max_iter[str(year)]))

2011년 군집 수 :5
2012년 군집 수 :6
2013년 군집 수 :5
2014년 군집 수 :6
2015년 군집 수 :5
2016년 군집 수 :5
2017년 군집 수 :5
2018년 군집 수 :6
2019년 군집 수 :4
2020년 군집 수 :4
2021년 군집 수 :6


# 연도 별 각 그룹의 weight-degree-centrality & weight eigenvector centrality 계산

In [None]:
# 각 년도별 그룹 네트워크의 centrality 계산
for year in tqdm(range(startYear, endYear+1), desc="특정 년도의 특정 그룹에 있는 centrality를 출력", mininterval=0.01):

  for number in range(max_iter[str(year)]):
    print('weighted degree centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['wdeg'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')
    print('weighted eigenvector centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['weig'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')

특정 년도의 특정 그룹에 있는 centrality를 출력: 100%|██████████| 11/11 [00:00<00:00, 149.07it/s]

weighted degree centrality of group0 in 2011:
 [('Germany', 1804376136.0), ('Spain', 1655551723.0), ('Italy', 1522723097.0), ('Netherlands', 1480466891.0), ('Ukraine', 1238630232.0)] 

weighted eigenvector centrality of group0 in 2011:
 [('Netherlands', 0.6637665851538416), ('Belgium', 0.5892191185292874), ('Italy', 0.29127399697140904), ('Germany', 0.27448896631572167), ('Israel', 0.14698726346716115)] 

weighted degree centrality of group1 in 2011:
 [('Australia', 6089649349.0), ('Viet Nam', 1453185332.0), ('Indonesia', 1165405707.0), ('Bangladesh', 1037494507.0), ('Pakistan', 927311994.0)] 

weighted eigenvector centrality of group1 in 2011:
 [('Bangladesh', 0.697390781037689), ('Viet Nam', 0.3878661677120036), ('Indonesia', 0.30943097319806584), ('Ethiopia', 0.2121595248951553), ('Sri Lanka', 0.20764869635296263)] 

weighted degree centrality of group2 in 2011:
 [('Canada', 4646168527.0), ('USA', 4419766054.0), ('Japan', 2634341522.0), ('Mexico', 1655633433.0), ('Venezuela', 853181




# Centrality 높은 상위 국가 판단

거래 금액을 weight로 지정한 아래의 두 Centrality를 이용해 파악해보았다.

degree centrality

eigenvector centrality

In [None]:
# DataFrame으로 각 degree별 상위 국가 Top 5 파악
centrality = {}
centrality_df = {}


for year in tqdm(range(startYear, endYear+1), desc="degree별 상위 Top5 국가 파악", mininterval=0.01):
  centrality[str(year)] = {}
  centrality_df[str(year)] = {}

  for number in range(max_iter[str(year)]):
      
    centrality[str(year)][str(number)] = {}

    # # degree centrality
    centrality[str(year)][str(number)]['wdeg'] = [item for item, value in sorted(subgroup_cenInfo[str(year)][str(number)]['wdeg'].items(), key=lambda x:x[1], reverse=True)[0:5]]
    centrality[str(year)][str(number)]['weig'] = [item for item, value in sorted(subgroup_cenInfo[str(year)][str(number)]['weig'].items(), key=lambda x:x[1], reverse=True)[0:5]]

    centrality_df[str(year)][str(number)] = pd.DataFrame([
                                                      centrality[str(year)][str(number)]['wdeg'],
                                                      centrality[str(year)][str(number)]['weig']])

    centrality_df[str(year)][str(number)].rename(
                columns={0:'1st', 1:'2nd', 2:'3rd', 3:'4th', 4:'5th'},
                index = {0:'weigthed degree centrality',
                         1:'weigthed eigenvector centrality' },inplace=True)


degree별 상위 Top5 국가 파악: 100%|██████████| 11/11 [00:00<00:00, 80.74it/s]


In [None]:
year= 2021
number= 0

print('weighted degree centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['wdeg'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')
print('weighted eigenvector centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['weig'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')

weighted degree centrality of group0 in 2021:
 [('Uzbekistan', 6846694.0), ('Tajikistan', 5794570.0), ('Kyrgyzstan', 786551.0), ('Afghanistan', 265573.0)] 

weighted eigenvector centrality of group0 in 2021:
 [('Tajikistan', 0.9898924823058234), ('Kyrgyzstan', 0.13436740769594036), ('Afghanistan', 0.045368196167368334), ('Uzbekistan', 8.54155180531347e-08)] 



In [None]:
centrality_df['2021']['0']

Unnamed: 0,1st,2nd,3rd,4th
weigthed degree centrality,Uzbekistan,Tajikistan,Kyrgyzstan,Afghanistan
weigthed eigenvector centrality,Tajikistan,Kyrgyzstan,Afghanistan,Uzbekistan


In [None]:
year= 2021
number= 1

print('weighted degree centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['wdeg'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')
print('weighted eigenvector centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['weig'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')

weighted degree centrality of group1 in 2021:
 [('Italy', 2118809249.0), ('France', 1313872154.0), ('Hungary', 981973563.0), ('Spain', 689133089.0), ('Netherlands', 681450829.0)] 

weighted eigenvector centrality of group1 in 2021:
 [('Italy', 0.5824406114401812), ('Netherlands', 0.5675644897329521), ('Spain', 0.5055386105609142), ('Tunisia', 0.23112190182806017), ('France', 0.12320932754744184)] 



In [None]:
centrality_df['2021']['1']

Unnamed: 0,1st,2nd,3rd,4th,5th
weigthed degree centrality,Italy,France,Hungary,Spain,Netherlands
weigthed eigenvector centrality,Italy,Netherlands,Spain,Tunisia,France


In [None]:
year= 2021
number= 2

print('weighted degree centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['wdeg'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')
print('weighted eigenvector centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['weig'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')

weighted degree centrality of group2 in 2021:
 [('Germany', 1765998789.0), ('Poland', 1181924435.0), ('Algeria', 915789660.0), ('Lithuania', 861515056.0), ('Latvia', 720615372.0)] 

weighted eigenvector centrality of group2 in 2021:
 [('Algeria', 0.8286696805349273), ('United Kingdom', 0.39851350619507675), ('Nigeria', 0.2523393460085499), ('Ireland', 0.17163524662305948), ('Portugal', 0.14716028149259555)] 



In [None]:
centrality_df['2021']['2']

Unnamed: 0,1st,2nd,3rd,4th,5th
weigthed degree centrality,Germany,Poland,Algeria,Lithuania,Latvia
weigthed eigenvector centrality,Algeria,United Kingdom,Nigeria,Ireland,Portugal


In [None]:
year= 2021
number= 3

print('weighted degree centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['wdeg'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')
print('weighted eigenvector centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['weig'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')

weighted degree centrality of group3 in 2021:
 [('USA', 6045924929.0), ('Canada', 4511103421.0), ('Mexico', 2828397714.0), ('Japan', 2672442802.0), ('Ecuador', 887027458.0)] 

weighted eigenvector centrality of group3 in 2021:
 [('Mexico', 0.7799174172482352), ('Japan', 0.5775586152453517), ('Ecuador', 0.12889764743876556), ('Guatemala', 0.10864316098351394), ('Colombia', 0.10247002101677398)] 



In [None]:
centrality_df['2021']['3']

Unnamed: 0,1st,2nd,3rd,4th,5th
weigthed degree centrality,USA,Canada,Mexico,Japan,Ecuador
weigthed eigenvector centrality,Mexico,Japan,Ecuador,Guatemala,Colombia


In [None]:
year= 2021
number= 4

print('weighted degree centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['wdeg'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')
print('weighted eigenvector centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['weig'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')

weighted degree centrality of group4 in 2021:
 [('Ukraine', 4317555433.0), ('Egypt', 1396438759.0), ('Romania', 1232118172.0), ('Turkey', 958951841.0), ('Pakistan', 910797367.0)] 

weighted eigenvector centrality of group4 in 2021:
 [('Turkey', 0.6346382105050495), ('Syria', 0.5533081026193795), ('Libya', 0.5067282658889474), ('Sierra Leone', 0.1773776129209625), ('Lebanon', 0.05054235366814237)] 



In [None]:
centrality_df['2021']['4']

Unnamed: 0,1st,2nd,3rd,4th,5th
weigthed degree centrality,Ukraine,Egypt,Romania,Turkey,Pakistan
weigthed eigenvector centrality,Turkey,Syria,Libya,Sierra Leone,Lebanon


In [None]:
year= 2021
number= 5

print('weighted degree centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['wdeg'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')
print('weighted eigenvector centrality of group{} in {}:\n'.format(number, year), sorted(subgroup_cenInfo[str(year)][str(number)]['weig'].items(), key=lambda x:x[1], reverse=True)[0:5], '\n')

weighted degree centrality of group5 in 2021:
 [('Australia', 3895948661.0), ('Philippines', 1323177099.0), ('China', 880677306.0), ('Rep. of Korea', 333961329.0), ('Malaysia', 333769508.0)] 

weighted eigenvector centrality of group5 in 2021:
 [('Philippines', 0.717402363026162), ('China', 0.5559819261260254), ('Malaysia', 0.20604589881647414), ('New Zealand', 0.2009934101595429), ('Rep. of Korea', 0.1965201089374598)] 



In [None]:
centrality_df['2021']['5']

Unnamed: 0,1st,2nd,3rd,4th,5th
weigthed degree centrality,Australia,Philippines,China,Rep. of Korea,Malaysia
weigthed eigenvector centrality,Philippines,China,Malaysia,New Zealand,Rep. of Korea


# Sub Group Plotting

나누어진 각 클러스터들의 네트워크 관계를 시각화하고 이미지로 저장해보았다.

In [None]:
# louvain algorithm으로 partition한 cluster dataset "sub_group_wheat"를 불러옴
with open('/content/drive/Shareddrives/22-1 데이터마이닝/TermProject/Louvain_Analysis/pickles/sub_group_wheat.pkl', 'rb') as f:
  sub_group_wheat = pickle.load(f)

In [None]:
subGraph = {}

# for year in range(startYear, endYear+1):
for year in range(startYear, endYear+1):
    
    subGraph[str(year)] = {}

    for number in range(max_iter[str(year)]):
      # making graph
      subGraph[str(year)][str(number)] = nx.from_pandas_edgelist(
          sub_group_wheat[str(year)][str(number)], source='From', target='To', 
          edge_attr='Trade Value (US$)', create_using=nx.DiGraph()) # Directed Graph

      print(f'SubGraph {number} of {year} : {nx.info(subGraph[str(year)][str(number)])}')

SubGraph 0 of 2011 : DiGraph with 51 nodes and 538 edges
SubGraph 1 of 2011 : DiGraph with 51 nodes and 209 edges
SubGraph 2 of 2011 : DiGraph with 40 nodes and 89 edges
SubGraph 3 of 2011 : DiGraph with 40 nodes and 129 edges
SubGraph 4 of 2011 : DiGraph with 14 nodes and 17 edges
SubGraph 0 of 2012 : DiGraph with 52 nodes and 533 edges
SubGraph 1 of 2012 : DiGraph with 53 nodes and 155 edges
SubGraph 2 of 2012 : DiGraph with 16 nodes and 27 edges
SubGraph 3 of 2012 : DiGraph with 15 nodes and 29 edges
SubGraph 4 of 2012 : DiGraph with 14 nodes and 22 edges
SubGraph 5 of 2012 : DiGraph with 46 nodes and 163 edges
SubGraph 0 of 2013 : DiGraph with 52 nodes and 540 edges
SubGraph 1 of 2013 : DiGraph with 39 nodes and 58 edges
SubGraph 2 of 2013 : DiGraph with 25 nodes and 73 edges
SubGraph 3 of 2013 : DiGraph with 31 nodes and 91 edges
SubGraph 4 of 2013 : DiGraph with 49 nodes and 168 edges
SubGraph 0 of 2014 : DiGraph with 63 nodes and 580 edges
SubGraph 1 of 2014 : DiGraph with 18 no

In [None]:
list(subGraph[str(2021)][str(0)].nodes())

['Uzbekistan', 'Afghanistan', 'Kyrgyzstan', 'Tajikistan']

In [None]:
# finding independent, moderate, dependent node in Graph
def nodeDependency(G):
  indep_nodes = []
  moderate_nodes = []
  dep_nodes = []
  for node in list(G.nodes()):
      ratio = G.out_degree(node, weight='Trade Value (US$)') / (G.out_degree(node, weight='Trade Value (US$)') + G.in_degree(node, weight='Trade Value (US$)'))
      dep_nodes.append(node) if ratio < 0.3 else indep_nodes.append(node) if ratio > 0.7 else moderate_nodes.append(node)

  return indep_nodes, moderate_nodes, dep_nodes

In [None]:
# https://towardsdatascience.com/customizing-networkx-graphs-f80b4e69bedf

def DiGraphPlot(G, year, number):
    fig, ax = plt.subplots()
    fig.set_size_inches((48, 48))
    
    # nodelist by dependency
    indep_nodes, moderate_nodes, dep_nodes = nodeDependency(G)
    
    ## layout 설정
    layout = {"circular layout of group{} in {}".format(number, year) : nx.circular_layout(G)}
              
    for _, item in enumerate(layout.items()):
        title, pos = item[0], item[1]
        nx.draw_networkx(G, pos=pos, font_color="white", ax=ax, font_size=25)
        
        # 수입 의존도가 낮은 노드
        nx.draw_networkx_nodes(G, pos,
                               nodelist = indep_nodes,
                               node_color="tab:blue",
                               node_size = 30000)
        
        # 수입 의존도가 중간 정도인 노드
        nx.draw_networkx_nodes(G, pos,
                               nodelist = moderate_nodes,
                               node_color= "tab:green",
                               node_size = 30000)        
        
        # 수입 의존도가 높은 노드
        nx.draw_networkx_nodes(G, pos,
                               nodelist = dep_nodes,
                               node_color="tab:red",
                               node_size = 30000)
        
        # arrow style 지정
        nx.draw_networkx_edges(G, pos, edge_color='k', arrowsize=30, connectionstyle='arc3', min_target_margin=100)

        
        ax.set_title(title, fontsize=40)
        ax.axis('off')
    

    # https://stackoverflow.com/questions/4581504/how-to-set-opacity-of-background-colour-of-graph-with-matplotlib
    plt.rcParams.update({
        "figure.facecolor":  (1.0, 229/255, 204/255, 0.3),  # with alpha = 30%
        "savefig.facecolor": (1.0, 229/255, 204/255, 0.3),  # with alpha = 30%
    })

    plt.tight_layout()
    plt.savefig("/content/drive/Shareddrives/22-1 데이터마이닝/TermProject/Louvain_Analysis/subgroupIMG/subgroup_{}_in_{}.png".format(number, year),
                # dpi = 200, # 해상도
                bbox_inches='tight' # 
                )
    plt.show()
    plt.close()

In [None]:
# saving each plot images
for year in tqdm(range(startYear, endYear+1), desc = "plotting DiGraph", mininterval=0.01):

  for number in range(max_iter[str(year)]):

    DiGraphPlot(subGraph[str(year)][str(number)], year, number)