# DA lab 6주차 링크프레딕션: networkx 라이브러리의 활용

## Link Prediction

### Contents
* 데이터 준비(두 개의 분석구간)
* 링크 예측
* 성능 검증

### Used Library
* networkx
* pandas
* pickle

In [1]:
import pandas as pd
import networkx as nx
import pickle

### Link Prediction 알고리즘
* resource_allocation_index
* jaccard_coefficient
* adamic_adar_index
* preferential_attachment
* cn_soundarajan_hopcroft
* ra_index_soundarajan_hopcroft
* within_inter_cluster

In [2]:
with open('ipc_nx_list(1).pickle','rb') as f:
    data_first=pickle.load(f)
with open('ipc_nx_list(2).pickle','rb') as d:
    data_second=pickle.load(d)

### 2가지 분석구간의 데이터 준비

In [3]:
data_first

[('A61C', 'A61B', 5),
 ('G01B', 'C08F', 1),
 ('B65D', 'B43M', 1),
 ('B65G', 'A01B', 1),
 ('C12N', 'C11B', 1),
 ('G01L', 'G01K', 1),
 ('H04Q', 'G09B', 1),
 ('G01F', 'F17D', 2),
 ('C22C', 'B32B', 1),
 ('A61K', 'A45D', 1),
 ('F21V', 'A01K', 1),
 ('A61N', 'A61C', 1),
 ('A42B', 'A41D', 1),
 ('G01N', 'A61M', 2),
 ('H04B', 'G01F', 1),
 ('G09F', 'G01N', 1),
 ('H04Q', 'H04B', 1),
 ('C21D', 'B23K', 2),
 ('H04N', 'A63B', 2),
 ('G08B', 'G05D', 1),
 ('F25D', 'A47G', 1),
 ('B23K', 'A61L', 2),
 ('B31B', 'A47G', 1),
 ('H04N', 'G09F', 1),
 ('B27B', 'A61B', 1),
 ('B08B', 'A61M', 1),
 ('B29C', 'A63B', 3),
 ('B29C', 'A61F', 2),
 ('A61M', 'A61J', 1),
 ('B05C', 'A61F', 2),
 ('G01M', 'F17D', 1),
 ('C08F', 'A61L', 1),
 ('A42B', 'A01M', 1),
 ('H02N', 'G08B', 1),
 ('C02F', 'A61L', 1),
 ('C12Q', 'A61B', 1),
 ('A47J', 'A23L', 3),
 ('F02D', 'B01J', 1),
 ('C12N', 'A01G', 1),
 ('G03B', 'A63J', 1),
 ('C23C', 'B23P', 1),
 ('G08B', 'G06F', 3),
 ('H01B', 'B82Y', 1),
 ('G06F', 'A61G', 2),
 ('B29C', 'A61M', 2),
 ('B65B', 

In [4]:
data_second

[('C11B', 'C07D', 1),
 ('C10B', 'B09B', 1),
 ('F16L', 'B62D', 1),
 ('G03F', 'B41C', 1),
 ('G06F', 'C08C', 1),
 ('F02B', 'C01B', 1),
 ('B32B', 'A61F', 1),
 ('F16C', 'B61F', 1),
 ('C03B', 'A61J', 1),
 ('B62K', 'B60Q', 1),
 ('B64D', 'B60R', 2),
 ('C07D', 'C04B', 1),
 ('B60Q', 'A01B', 1),
 ('C09G', 'B24D', 1),
 ('G01N', 'B01J', 2),
 ('G02F', 'B32B', 1),
 ('B62D', 'B60P', 4),
 ('G02B', 'B64F', 1),
 ('G02B', 'C07C', 1),
 ('G02F', 'C08F', 1),
 ('B67B', 'B65D', 1),
 ('F02D', 'C12N', 1),
 ('G06T', 'B29C', 1),
 ('G01N', 'B82Y', 2),
 ('C05C', 'C05B', 1),
 ('B62D', 'B60R', 10),
 ('B60J', 'B32B', 2),
 ('H01B', 'B82Y', 1),
 ('G02F', 'G02B', 2),
 ('G06F', 'B29C', 1),
 ('C07J', 'A61K', 1),
 ('H01L', 'C08G', 2),
 ('B65B', 'A61B', 2),
 ('F16B', 'B30B', 1),
 ('B62L', 'B62J', 1),
 ('C04B', 'B23K', 1),
 ('C08F', 'B05D', 2),
 ('F16H', 'B60W', 1),
 ('B60W', 'B60R', 3),
 ('C10M', 'B32B', 1),
 ('G01V', 'G01S', 1),
 ('B62D', 'B60L', 1),
 ('C10H', 'C07C', 1),
 ('B61L', 'B61C', 2),
 ('F16B', 'B23P', 1),
 ('C22B',

### 쌍대노드 간의 링크 강도 예측

In [5]:
p=nx.Graph()
p.add_weighted_edges_from(data_first)

### adamic-adar 알고리즘의 활용

In [6]:
preds=nx.adamic_adar_index(p)

In [7]:
result_dic={}
for u,v,p in preds:
    result_dic[(u,v)]=p

In [8]:
result_dic

{('B25J', 'A01J'): 0,
 ('G01C', 'F28F'): 0,
 ('A01D', 'B60L'): 0,
 ('A63F', 'C12M'): 0,
 ('B23B', 'B05B'): 0.7553857282466059,
 ('B04B', 'C12C'): 0,
 ('B60N', 'B23B'): 0,
 ('H04R', 'B60B'): 0,
 ('F16J', 'B25D'): 0,
 ('B32B', 'A47C'): 1.3067276426852328,
 ('F01B', 'B29D'): 0,
 ('A01K', 'B31B'): 0.2835784920513334,
 ('A61H', 'B67C'): 0,
 ('B25B', 'B33Y'): 0,
 ('C11B', 'A47G'): 0,
 ('D06F', 'G09B'): 0,
 ('G01K', 'B25C'): 0.2422988705246811,
 ('G01S', 'E02F'): 0,
 ('B65D', 'B23C'): 0,
 ('F02C', 'G01V'): 0,
 ('B02C', 'F01N'): 0,
 ('H01M', 'C08F'): 0.2769378934088574,
 ('H01J', 'B23Q'): 0,
 ('B65B', 'B22F'): 0,
 ('G01S', 'B23K'): 0,
 ('F17C', 'F21W'): 0,
 ('B23D', 'A01M'): 0,
 ('A61C', 'C08J'): 0.5081714561995331,
 ('B65B', 'B64F'): 0,
 ('G01R', 'F03C'): 0,
 ('A63G', 'G07C'): 0,
 ('G01S', 'B60K'): 0.5282985380273555,
 ('G02C', 'H04W'): 0.6974184838380998,
 ('H02K', 'F04B'): 0,
 ('B32B', 'A21D'): 0.5905061684814683,
 ('E03F', 'C07D'): 0,
 ('A63C', 'B64D'): 0,
 ('B25D', 'A23J'): 0,
 ('G06Q', '

### 성능 검증

In [15]:
compare_dic={}

In [34]:
for row in data_second:
    if(tmp in result_dic ): ##예측 결과와 데이터2링크 겹치는 거 비교
        if(result_dic[tmp]!=0):
            compare_dic[tmp]=1
        else:
            compare_dic[tmp]=0
    else:
        compare_dic[tmp]=0

In [35]:
compare_dic

{('A61N', 'A01K'): 1,
 ('A61N', 'A01N'): 1,
 ('B01L', 'A61B'): 1,
 ('B22F', 'B21D'): 1,
 ('B23P', 'B23C'): 1,
 ('B23P', 'B23K'): 1,
 ('B23Q', 'A47B'): 0,
 ('B23Q', 'B23C'): 1,
 ('B23Q', 'B23K'): 1,
 ('B25B', 'B23B'): 0,
 ('B25B', 'B23Q'): 0,
 ('B25H', 'A47B'): 0,
 ('B25J', 'A62B'): 1,
 ('B26B', 'A61B'): 0,
 ('B26D', 'B25J'): 0,
 ('B27B', 'A01G'): 0,
 ('B29C', 'B05D'): 1,
 ('B29C', 'B23K'): 1,
 ('B29C', 'B23P'): 0,
 ('B29K', 'B05D'): 1,
 ('B29K', 'B23P'): 0,
 ('B29K', 'B29B'): 0,
 ('B29K', 'B29D'): 1,
 ('B29L', 'A43D'): 0,
 ('B29L', 'A61J'): 1,
 ('B29L', 'B05D'): 1,
 ('B29L', 'B23K'): 1,
 ('B29L', 'B23P'): 0,
 ('B32B', 'A61J'): 1,
 ('B32B', 'A61L'): 1,
 ('B32B', 'B05D'): 1,
 ('B32B', 'B22F'): 1,
 ('B32B', 'B23K'): 1,
 ('B32B', 'B26F'): 0,
 ('B32B', 'B29K'): 1,
 ('B60K', 'A01D'): 0,
 ('B60N', 'A47B'): 0,
 ('B60N', 'B29D'): 1,
 ('B60R', 'A47B'): 0,
 ('B60S', 'B60P'): 0,
 ('B62D', 'B25H'): 0,
 ('B62D', 'B60R'): 0,
 ('B64D', 'B33Y'): 1,
 ('B64D', 'B60N'): 1,
 ('B64D', 'B60R'): 1,
 ('B64F', 