# DA lab 6주차 링크프레딕션: networkx 라이브러리의 활용

## Link Prediction

### Contents
* 데이터 준비(두 개의 분석구간)
* 링크 예측
* 성능 검증

### Used Library
* networkx
* pandas
* pickle

In [2]:
import pandas as pd
import networkx as nx
import pickle

### Link Prediction 알고리즘
* resource_allocation_index
* jaccard_coefficient
* adamic_adar_index
* preferential_attachment
* cn_soundarajan_hopcroft
* ra_index_soundarajan_hopcroft
* within_inter_cluster

In [3]:
with open('ipc_nx_list(1).pickle','rb') as f:
    data_first=pickle.load(f)
with open('ipc_nx_list(2).pickle','rb') as d:
    data_second=pickle.load(d)

### 2가지 분석구간의 데이터 준비

In [4]:
data_first

[('A61C', 'A61B', 5),
 ('G01B', 'C08F', 1),
 ('B65D', 'B43M', 1),
 ('B65G', 'A01B', 1),
 ('C12N', 'C11B', 1),
 ('G01L', 'G01K', 1),
 ('H04Q', 'G09B', 1),
 ('G01F', 'F17D', 2),
 ('C22C', 'B32B', 1),
 ('A61K', 'A45D', 1),
 ('F21V', 'A01K', 1),
 ('A61N', 'A61C', 1),
 ('A42B', 'A41D', 1),
 ('G01N', 'A61M', 2),
 ('H04B', 'G01F', 1),
 ('G09F', 'G01N', 1),
 ('H04Q', 'H04B', 1),
 ('C21D', 'B23K', 2),
 ('H04N', 'A63B', 2),
 ('G08B', 'G05D', 1),
 ('F25D', 'A47G', 1),
 ('B23K', 'A61L', 2),
 ('B31B', 'A47G', 1),
 ('H04N', 'G09F', 1),
 ('B27B', 'A61B', 1),
 ('B08B', 'A61M', 1),
 ('B29C', 'A63B', 3),
 ('B29C', 'A61F', 2),
 ('A61M', 'A61J', 1),
 ('B05C', 'A61F', 2),
 ('G01M', 'F17D', 1),
 ('C08F', 'A61L', 1),
 ('A42B', 'A01M', 1),
 ('H02N', 'G08B', 1),
 ('C02F', 'A61L', 1),
 ('C12Q', 'A61B', 1),
 ('A47J', 'A23L', 3),
 ('F02D', 'B01J', 1),
 ('C12N', 'A01G', 1),
 ('G03B', 'A63J', 1),
 ('C23C', 'B23P', 1),
 ('G08B', 'G06F', 3),
 ('H01B', 'B82Y', 1),
 ('G06F', 'A61G', 2),
 ('B29C', 'A61M', 2),
 ('B65B', 

In [5]:
data_second

[('C11B', 'C07D', 1),
 ('C10B', 'B09B', 1),
 ('F16L', 'B62D', 1),
 ('G03F', 'B41C', 1),
 ('G06F', 'C08C', 1),
 ('F02B', 'C01B', 1),
 ('B32B', 'A61F', 1),
 ('F16C', 'B61F', 1),
 ('C03B', 'A61J', 1),
 ('B62K', 'B60Q', 1),
 ('B64D', 'B60R', 2),
 ('C07D', 'C04B', 1),
 ('B60Q', 'A01B', 1),
 ('C09G', 'B24D', 1),
 ('G01N', 'B01J', 2),
 ('G02F', 'B32B', 1),
 ('B62D', 'B60P', 4),
 ('G02B', 'B64F', 1),
 ('G02B', 'C07C', 1),
 ('G02F', 'C08F', 1),
 ('B67B', 'B65D', 1),
 ('F02D', 'C12N', 1),
 ('G06T', 'B29C', 1),
 ('G01N', 'B82Y', 2),
 ('C05C', 'C05B', 1),
 ('B62D', 'B60R', 10),
 ('B60J', 'B32B', 2),
 ('H01B', 'B82Y', 1),
 ('G02F', 'G02B', 2),
 ('G06F', 'B29C', 1),
 ('C07J', 'A61K', 1),
 ('H01L', 'C08G', 2),
 ('B65B', 'A61B', 2),
 ('F16B', 'B30B', 1),
 ('B62L', 'B62J', 1),
 ('C04B', 'B23K', 1),
 ('C08F', 'B05D', 2),
 ('F16H', 'B60W', 1),
 ('B60W', 'B60R', 3),
 ('C10M', 'B32B', 1),
 ('G01V', 'G01S', 1),
 ('B62D', 'B60L', 1),
 ('C10H', 'C07C', 1),
 ('B61L', 'B61C', 2),
 ('F16B', 'B23P', 1),
 ('C22B',

### 쌍대노드 간의 링크 강도 예측

In [6]:
p=nx.Graph()
p.add_weighted_edges_from(data_first)

### adamic-adar 알고리즘의 활용

In [7]:
preds=nx.adamic_adar_index(p)

In [8]:
result_dic={}
for u,v,p in preds:
    result_dic[(u,v)]=p

In [9]:
result_dic

{('H01F', 'E21B'): 0.36067376022224085,
 ('H01F', 'A01J'): 0,
 ('H01F', 'B26B'): 0.30692767643013485,
 ('H01F', 'G08C'): 0,
 ('H01F', 'H04L'): 0,
 ('H01F', 'A61B'): 0,
 ('H01F', 'G21F'): 0,
 ('H01F', 'D04D'): 0,
 ('H01F', 'B02C'): 0,
 ('H01F', 'F16N'): 0,
 ('H01F', 'C01D'): 0,
 ('H01F', 'A47D'): 0,
 ('H01F', 'H05K'): 0,
 ('H01F', 'D06F'): 0,
 ('H01F', 'F21V'): 0,
 ('H01F', 'A63F'): 0,
 ('H01F', 'B64D'): 0,
 ('H01F', 'H01J'): 0,
 ('H01F', 'B22F'): 0,
 ('H01F', 'E03B'): 0,
 ('H01F', 'C08F'): 0,
 ('H01F', 'B01L'): 0,
 ('H01F', 'F16C'): 0,
 ('H01F', 'G21K'): 0,
 ('H01F', 'A43B'): 0,
 ('H01F', 'H04N'): 0,
 ('H01F', 'H02N'): 0,
 ('H01F', 'A63C'): 0,
 ('H01F', 'C12R'): 0,
 ('H01F', 'B29L'): 0,
 ('H01F', 'E01H'): 0,
 ('H01F', 'C25D'): 0,
 ('H01F', 'B60R'): 0.5581106265512472,
 ('H01F', 'A61Q'): 0,
 ('H01F', 'B21F'): 0,
 ('H01F', 'C07K'): 0,
 ('H01F', 'A23D'): 0.30692767643013485,
 ('H01F', 'B60N'): 0,
 ('H01F', 'B23D'): 0,
 ('H01F', 'C05G'): 0,
 ('H01F', 'A61G'): 0,
 ('H01F', 'C12P'): 0,
 ('H0

### 성능 검증

In [10]:
compare_dic={}

In [11]:
for tmp in data_second:
    if(tmp in result_dic ): ##예측 결과와 데이터2링크 겹치는 거 비교
        if(result_dic[tmp]!=0):
            compare_dic[tmp]=1
        else:
            compare_dic[tmp]=0
    else:
        compare_dic[tmp]=0

In [13]:
compare_dic

{('C11B', 'C07D', 1): 0,
 ('C10B', 'B09B', 1): 0,
 ('F16L', 'B62D', 1): 0,
 ('G03F', 'B41C', 1): 0,
 ('G06F', 'C08C', 1): 0,
 ('F02B', 'C01B', 1): 0,
 ('B32B', 'A61F', 1): 0,
 ('F16C', 'B61F', 1): 0,
 ('C03B', 'A61J', 1): 0,
 ('B62K', 'B60Q', 1): 0,
 ('B64D', 'B60R', 2): 0,
 ('C07D', 'C04B', 1): 0,
 ('B60Q', 'A01B', 1): 0,
 ('C09G', 'B24D', 1): 0,
 ('G01N', 'B01J', 2): 0,
 ('G02F', 'B32B', 1): 0,
 ('B62D', 'B60P', 4): 0,
 ('G02B', 'B64F', 1): 0,
 ('G02B', 'C07C', 1): 0,
 ('G02F', 'C08F', 1): 0,
 ('B67B', 'B65D', 1): 0,
 ('F02D', 'C12N', 1): 0,
 ('G06T', 'B29C', 1): 0,
 ('G01N', 'B82Y', 2): 0,
 ('C05C', 'C05B', 1): 0,
 ('B62D', 'B60R', 10): 0,
 ('B60J', 'B32B', 2): 0,
 ('H01B', 'B82Y', 1): 0,
 ('G02F', 'G02B', 2): 0,
 ('G06F', 'B29C', 1): 0,
 ('C07J', 'A61K', 1): 0,
 ('H01L', 'C08G', 2): 0,
 ('B65B', 'A61B', 2): 0,
 ('F16B', 'B30B', 1): 0,
 ('B62L', 'B62J', 1): 0,
 ('C04B', 'B23K', 1): 0,
 ('C08F', 'B05D', 2): 0,
 ('F16H', 'B60W', 1): 0,
 ('B60W', 'B60R', 3): 0,
 ('C10M', 'B32B', 1): 0,