In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import networkx as nx
import community.community_louvain as cl
from networkx.drawing.nx_pydot import graphviz_layout

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.cm as cm

import numpy as np
import pandas as pd
import copy
import pickle

import time
from tqdm import tqdm

# preprocessed dataset of wheat trading from 2006 to 2021

In [None]:
df = pd.read_csv("/content/drive/Shareddrives/22-1 데이터마이닝/TermProject/Data/comtradedata/df_2006_2021_1001.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Year,From,To,Trade Value (US$)
0,0,2006,Algeria,Egypt,5452264
1,1,2006,Andorra,Lebanon,1
2,2,2006,"Areas, nes",Aruba,14580
3,3,2006,"Areas, nes",Côte d'Ivoire,339
4,4,2006,"Areas, nes",Ireland,1575677


In [None]:
df.drop(["Unnamed: 0"], axis=1, inplace=True)
df.head()

Unnamed: 0,Year,From,To,Trade Value (US$)
0,2006,Algeria,Egypt,5452264
1,2006,Andorra,Lebanon,1
2,2006,"Areas, nes",Aruba,14580
3,2006,"Areas, nes",Côte d'Ivoire,339
4,2006,"Areas, nes",Ireland,1575677


# Dataset of Korea

In [None]:
df_f = df[df['From']=='Rep. of Korea'].copy()
df_t = df[df['To']=='Rep. of Korea'].copy()
df_korea = pd.concat([df_f, df_t])

In [None]:
df_korea

Unnamed: 0,Year,From,To,Trade Value (US$)
1190,2006,Rep. of Korea,Bolivia (Plurinational State of),32
1191,2006,Rep. of Korea,Canada,21991
1192,2006,Rep. of Korea,China,500
1193,2006,Rep. of Korea,Dominican Rep.,36
1194,2006,Rep. of Korea,Mexico,160
...,...,...,...,...
30069,2021,Latvia,Rep. of Korea,7295906
30359,2021,Romania,Rep. of Korea,111652191
30601,2021,Turkey,Rep. of Korea,1698480
30680,2021,USA,Rep. of Korea,495796158


### 2022년 예상 데이터셋 구성 (수출 제한 국가 삭제)

In [None]:
wheat2021 = df_korea[df_korea['Year'] == 2021].copy()

df2022 = wheat2021.loc[(wheat2021['From'] != 'Russian Federation') & (wheat2021['From'] != 'Ukraine') & (wheat2021['From'] != 'India')
                                & (wheat2021['To'] != 'Russian Federation') & (wheat2021['To'] != 'Ukraine')]

df2022['Year'] = 2022

df2022.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Year,From,To,Trade Value (US$)
30300,2022,Rep. of Korea,Netherlands,62
30301,2022,Rep. of Korea,Philippines,310
29338,2022,Australia,Rep. of Korea,311250701
29416,2022,Belgium,Rep. of Korea,61113
29513,2022,Canada,Rep. of Korea,61827465
29680,2022,Estonia,Rep. of Korea,7082833
29795,2022,Germany,Rep. of Korea,543188
30069,2022,Latvia,Rep. of Korea,7295906
30359,2022,Romania,Rep. of Korea,111652191
30601,2022,Turkey,Rep. of Korea,1698480


# 2006 ~ 2022 Korea Dataset

In [None]:
df_korea = pd.concat([df_korea, df2022], ignore_index=True)
df_korea

Unnamed: 0,Year,From,To,Trade Value (US$)
0,2006,Rep. of Korea,Bolivia (Plurinational State of),32
1,2006,Rep. of Korea,Canada,21991
2,2006,Rep. of Korea,China,500
3,2006,Rep. of Korea,Dominican Rep.,36
4,2006,Rep. of Korea,Mexico,160
...,...,...,...,...
329,2022,Germany,Rep. of Korea,543188
330,2022,Latvia,Rep. of Korea,7295906
331,2022,Romania,Rep. of Korea,111652191
332,2022,Turkey,Rep. of Korea,1698480


In [None]:
startYear = 2006
endYear = 2022
df_kor = {}
for year in range(startYear, endYear+1):
  df_kor[year] = df_korea[df_korea['Year']==year].copy()
df_kor[2021]

Unnamed: 0,Year,From,To,Trade Value (US$)
81,2021,Rep. of Korea,Netherlands,62
82,2021,Rep. of Korea,Philippines,310
312,2021,Australia,Rep. of Korea,311250701
313,2021,Belgium,Rep. of Korea,61113
314,2021,Canada,Rep. of Korea,61827465
315,2021,Estonia,Rep. of Korea,7082833
316,2021,Germany,Rep. of Korea,543188
317,2021,India,Rep. of Korea,22649205
318,2021,Latvia,Rep. of Korea,7295906
319,2021,Romania,Rep. of Korea,111652191


In [None]:
# Test
print(df_kor[2022][df_kor[2022]['From'] == 'Russian Federation'])
print(df_kor[2022][df_kor[2022]['From'] == 'Ukraine'])
print(df_kor[2022][df_kor[2022]['From'] == 'India'])
print(df_kor[2022][df_kor[2022]['To'] == 'Russian Federation'])
print(df_kor[2022][df_kor[2022]['To'] == 'Ukraine'], '\n')
print(df_kor[2022][df_kor[2022]['To'] == 'India'])
# successfully eliminated

Empty DataFrame
Columns: [Year, From, To, Trade Value (US$)]
Index: []
Empty DataFrame
Columns: [Year, From, To, Trade Value (US$)]
Index: []
Empty DataFrame
Columns: [Year, From, To, Trade Value (US$)]
Index: []
Empty DataFrame
Columns: [Year, From, To, Trade Value (US$)]
Index: []
Empty DataFrame
Columns: [Year, From, To, Trade Value (US$)]
Index: [] 

Empty DataFrame
Columns: [Year, From, To, Trade Value (US$)]
Index: []


## korea central graph making

In [None]:
wheat_korea = {}

for year in range(startYear, endYear+1):
    wheat_korea[year] = nx.from_pandas_edgelist(df_kor[year], source='From', target='To', 
                                                 edge_attr='Trade Value (US$)', create_using=nx.DiGraph()) # Directed Graph

In [None]:
# finding independent, moderate, dependent node in Graph
def nodeDependency(G):
  indep_nodes = []
  moderate_nodes = []
  dep_nodes = []
  for node in list(G.nodes()):
      ratio = G.out_degree(node, weight='Trade Value (US$)') / (G.out_degree(node, weight='Trade Value (US$)') + G.in_degree(node, weight='Trade Value (US$)'))
      dep_nodes.append(node) if ratio < 0.3 else indep_nodes.append(node) if ratio > 0.7 else moderate_nodes.append(node)

  return indep_nodes, moderate_nodes, dep_nodes

In [None]:
def DiGraphPlot(G, year):
    fig, ax = plt.subplots()
    fig.set_size_inches((32, 32))
    
    plt.rcParams.update({
        "figure.facecolor":  (1.0, 229/255, 204/255, 0.3),  # with alpha = 30%
        "savefig.facecolor": (1.0, 229/255, 204/255, 0.3),  # with alpha = 30%
    })

    # nodelist by dependency
    indep_nodes, moderate_nodes, dep_nodes = nodeDependency(G)
    
    ## layout 설정
    layout = {"circular layout of Countries related to Korea in {}".format(year) : nx.circular_layout(G)}
              
    for _, item in enumerate(layout.items()):
        title, pos = item[0], item[1]
        nx.draw_networkx(G, pos=pos, font_color="white", ax=ax, font_size=12)
        
        # 수입 의존도가 낮은 노드
        nx.draw_networkx_nodes(G, pos,
                               nodelist = indep_nodes,
                               node_color="tab:blue",
                               node_size = 10000)        
        
        # 수입 수출량이 비슷한 노드
        nx.draw_networkx_nodes(G, pos,
                               nodelist = moderate_nodes,
                               node_color="green",
                               node_size = 10000)

        # 수입 의존도가 높은 노드
        nx.draw_networkx_nodes(G, pos,
                               nodelist = dep_nodes,
                               node_color="tab:red",
                               node_size = 10000)
        
        # arrow style 지정
        nx.draw_networkx_edges(G, pos, edge_color='k', arrowsize=25, connectionstyle='arc3', min_target_margin=60)

        
        ax.set_title(title, fontsize=40)
        ax.axis('off')

    plt.tight_layout()
    plt.savefig(f"/content/drive/Shareddrives/22-1 데이터마이닝/TermProject/Louvain_Analysis/Korea/koreaGraph_in_{year}.png",
                # dpi = 200, # 해상도
                bbox_inches='tight' # 
                )
    plt.show()
    plt.close()

In [None]:
# saving plot image

for year in tqdm(range(startYear, endYear+1), desc=f"plotting {year}...", mininterval=0.01):
  DiGraphPlot(wheat_korea[year], year)

## Centrality analysis

### return_centralities_as_dict()

In [None]:
from networkx.algorithms.centrality.eigenvector import eigenvector_centrality

def return_centralities_as_dict(input_g):
    # weighted degree centrality를 딕셔너리로 리턴
    def return_weighted_degree_centrality(input_g, normalized=False):
        w_d_centrality = {n:0.0 for n in input_g.nodes()} # initialize
        for u, v, d in input_g.edges(data=True):
            w_d_centrality[u]+=d['Trade Value (US$)']
            w_d_centrality[v]+=d['Trade Value (US$)']
        if normalized==True:
            weighted_sum = sum(w_d_centrality.values())
            return {k:v/weighted_sum for k, v in w_d_centrality.items()}
        else:
            return w_d_centrality

    def return_eigenvector_centrality(input_g):
        return nx.eigenvector_centrality(input_g, weight='Trade Value (US$)', max_iter=1000000000000) 
        # max_iter는 default 100으로 설정되어 미지정시 PowerIterationFailedConvergence 에러가 발생

    return {
        'weighted_deg':return_weighted_degree_centrality(input_g),
        'eigenvector_cent':return_eigenvector_centrality(input_g)
    }

In [None]:
# # 2hours

# kor_cenInfo = {}
    
# for year in tqdm(range(startYear, endYear+1), desc="{}년도의 centrality 계산".format(year), mininterval=0.01):
#   kor_cenInfo[year] = {}
  
#   kor_cenInfo[year]['wdeg'] = return_centralities_as_dict(wheat_korea[year])['weighted_deg']
  
#   kor_cenInfo[year]['weig'] = return_centralities_as_dict(wheat_korea[year])['eigenvector_cent']
  

In [None]:
# kor_cenInfo.keys()

## korea_cenInfo.pkl 로 저장하기

In [None]:
# # saving
# with open('/content/drive/Shareddrives/22-1 데이터마이닝/TermProject/Louvain_Analysis/pickles/kor_cenInfo.pkl', 'wb') as f:
#   pickle.dump(kor_cenInfo, f)

In [None]:
# loading
with open('/content/drive/Shareddrives/22-1 데이터마이닝/TermProject/Louvain_Analysis/pickles/kor_cenInfo.pkl', 'rb') as f:
  kor_cenInfo = pickle.load(f)

In [None]:
kor_cenInfo.keys()

dict_keys([2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022])

In [None]:
kor_cenInfo.values()

dict_values([{'wdeg': {'Rep. of Korea': 1014223196.0, 'Bolivia (Plurinational State of)': 32.0, 'Canada': 194703851.0, 'China': 108591645.0, 'Dominican Rep.': 36.0, 'Mexico': 160.0, 'Australia': 223428655.0, 'Brazil': 7085139.0, 'Bulgaria': 3474982.0, 'India': 84.0, 'Indonesia': 340.0, 'Italy': 1885.0, 'Japan': 15.0, 'Kyrgyzstan': 694220.0, 'Russian Federation': 9684.0, 'Singapore': 131197.0, 'Switzerland': 12.0, 'USA': 419244929.0, 'Ukraine': 56856330.0}, 'weig': {'Rep. of Korea': 0.9999442967129436, 'Bolivia (Plurinational State of)': 1.535428530056542e-05, 'Canada': 0.01055175275139794, 'China': 0.00023991070782133464, 'Dominican Rep.': 1.7273570963136097e-05, 'Mexico': 7.677142650282709e-05, 'Australia': 0.0, 'Brazil': 0.0, 'Bulgaria': 0.0, 'India': 0.0, 'Indonesia': 0.0, 'Italy': 0.0, 'Japan': 0.0, 'Kyrgyzstan': 0.0, 'Russian Federation': 0.0, 'Singapore': 0.0, 'Switzerland': 0.0, 'USA': 0.0, 'Ukraine': 0.0}}, {'wdeg': {'Rep. of Korea': 1375956319.0, 'Australia': 235001469.0, 'Ban

# Centrality 높은 상위 국가 판단

거래 금액을 weight로 지정한 아래의 두 Centrality를 이용해 파악해보았다.

degree centrality

eigenvector centrality

In [None]:
# DataFrame으로 각 degree별 상위 국가 Top 5 파악, 한국이 포함될 것임로 6순위 까지

centrality = {}
centrality_df = {}

for year in tqdm(range(startYear, endYear+1), desc="centrality top 5", mininterval=0.01):
    
  centrality[year] = {}

  # weighted degree centrality
  centrality[year]['wdeg'] = [item for item, value in sorted(kor_cenInfo[year]['wdeg'].items(), key=lambda x:x[1], reverse=True)[0:6]]
  
  # weighted eigenvector centrality
  centrality[year]['weig'] = [item for item, value in sorted(kor_cenInfo[year]['weig'].items(), key=lambda x:x[1], reverse=True)[0:6]]

  centrality_df[year] = pd.DataFrame([
                                centrality[year]['wdeg'],
                                centrality[year]['weig']])

  centrality_df[year].rename(
              columns={0:'1st', 1:'2nd', 2:'3rd', 3:'4th', 4:'5th', 5: '6th'},
              index = {
                        0:'weigthed degree centrality',
                        1:'weigthed eigenvector centrality' },inplace=True)


centrality top 5: 100%|██████████| 17/17 [00:00<00:00, 512.57it/s]


In [None]:
year= 2021

print(f'weighted degree centrality in {year}:\n', sorted(kor_cenInfo[year]['wdeg'].items(), key=lambda x:x[1], reverse=True)[0:6], '\n')
print(f'weighted eigenvector centrality in {year}:\n', sorted(kor_cenInfo[year]['weig'].items(), key=lambda x:x[1], reverse=True)[0:6], '\n')

weighted degree centrality in 2021:
 [('Rep. of Korea', 1082993365.0), ('USA', 495796158.0), ('Australia', 311250701.0), ('Romania', 111652191.0), ('Ukraine', 63135753.0), ('Canada', 61827465.0)] 

weighted eigenvector centrality in 2021:
 [('Philippines', 0.9805806351483545), ('Netherlands', 0.19611612702968018), ('Rep. of Korea', 0.00028756030354996985), ('Australia', 1.1544508573727233e-14), ('Belgium', 1.1544508573727233e-14), ('Canada', 1.1544508573727233e-14)] 



In [None]:
centrality_df[2021]

Unnamed: 0,1st,2nd,3rd,4th,5th,6th
weigthed degree centrality,Rep. of Korea,USA,Australia,Romania,Ukraine,Canada
weigthed eigenvector centrality,Philippines,Netherlands,Rep. of Korea,Australia,Belgium,Canada


In [None]:
year= 2022

print(f'weighted degree centrality in {year}:\n', sorted(kor_cenInfo[year]['wdeg'].items(), key=lambda x:x[1], reverse=True)[0:6], '\n')
print(f'weighted eigenvector centrality in {year}:\n', sorted(kor_cenInfo[year]['weig'].items(), key=lambda x:x[1], reverse=True)[0:6], '\n')

weighted degree centrality in 2022:
 [('Rep. of Korea', 997208407.0), ('USA', 495796158.0), ('Australia', 311250701.0), ('Romania', 111652191.0), ('Canada', 61827465.0), ('Latvia', 7295906.0)] 

weighted eigenvector centrality in 2022:
 [('Philippines', 0.9805806416239025), ('Netherlands', 0.19611612832478895), ('Rep. of Korea', 0.00026359694666163477), ('Australia', 1.0573398424374358e-14), ('Belgium', 1.0573398424374358e-14), ('Canada', 1.0573398424374358e-14)] 



In [None]:
centrality_df[2022]

Unnamed: 0,1st,2nd,3rd,4th,5th,6th
weigthed degree centrality,Rep. of Korea,USA,Australia,Romania,Canada,Latvia
weigthed eigenvector centrality,Philippines,Netherlands,Rep. of Korea,Australia,Belgium,Canada
