In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/N_Graph

/content/drive/MyDrive/N_Graph


In [3]:
import pandas as pd
import json
import urllib
from tqdm import tqdm

# **JSON Loader**

In [None]:
f = open('input.json',)

In [None]:
data = json.load(f)

In [None]:
f.close()

In [None]:
nodes = []
for i in tqdm(data['Graph']):
    px = []
    px.append(i['id'])
    px.append(i['label'])
    nodes.append(px)
  
# Closing file
f.close()

In [None]:
rels = []
for i in tqdm(data['Relationship']):
  px = []
  px.append(i['start'])
  px.append(i['end'])
  rels.append(px)
  
# Closing file
f.close()

In [None]:
xd = pd.DataFrame(
            nodes, columns=['id', 'label'])
xr = pd.DataFrame(
            rels, columns=['start', 'end'])

In [None]:
xd.head()

In [None]:
xr.head()

In [None]:
xr.tail()

In [None]:
orr = xr.groupby(['end'])

# **Split into PRO CON**

In [None]:
xd.id[xd.id == '74356'].index.tolist()

In [None]:
xd.id[xd.id == '77483'].index.tolist()

In [None]:
xd.id[xd.label == 'with discussion and facts on a particular view that they may hold for the'].index.tolist()

In [None]:
split_point = 74356
in_split = 77586

In [None]:
casualty = xd[:split_point]

In [None]:
neutral = xd[split_point:in_split]

In [None]:
pro_con = xd[in_split:]

In [None]:
casualty.tail()

In [None]:
neutral.head()

In [None]:
neutral.tail()

In [None]:
pro_con.head()

# **Fiass Implementation**

In [None]:
!pip install faiss-gpu
!pip install sentence_transformers
!pip install --user -U nltk

In [None]:
import numpy as np
import faiss
import requests
from io import StringIO
import pandas as pd
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize
from datetime import datetime
from sentence_transformers import SentenceTransformer
import os
import nltk
nltk.download("punkt")
nltk.download('stopwords')

In [None]:
data = casualty
data.head()

In [None]:
sentences = data['label'].tolist()
sentences[:10]

In [None]:
sentences_id = data['id'].tolist()
sentences_id[:10]

In [None]:
sentences = [
    sentence.replace('\n', '') for sentence in list(set(sentences)) if type(sentence) is str
    ]

In [None]:
with open('backup_sentences.txt', 'w') as fp:
    fp.write('\n'.join(sentences))

In [None]:
import torch

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
model = SentenceTransformer('bert-base-nli-mean-tokens', device='cuda').to(device)

sentence_embeddings = model.encode(sentences)
sentence_embeddings.shape

In [None]:
sentence_embeddings.shape[0]

In [None]:
with open(f'./sim_sentences/embeddings_X.npy', 'wb') as fp:
    np.save(fp, sentence_embeddings[0:256])

In [None]:
split = 256
file_count = 0
for i in range(0, sentence_embeddings.shape[0], split):
    end = i + split
    if end > sentence_embeddings.shape[0] + 1:
        end = sentence_embeddings.shape[0] + 1
    file_count = '0' + str(file_count) if file_count < 0 else str(file_count)
    with open(f'./sim_sentences/embeddings_{file_count}.npy', 'wb') as fp:
        np.save(fp, sentence_embeddings[i:end, :])
    print(f"embeddings_{file_count}.npy | {i} -> {end}")
    file_count = int(file_count) + 1

In [None]:
d = sentence_embeddings.shape[1]
d

In [None]:
nlist = 50
quantizer = faiss.IndexFlatL2(d)

In [None]:
res = faiss.StandardGpuResources()

In [None]:
f_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)

In [None]:
index = faiss.index_cpu_to_gpu(res, 0, f_index)

In [None]:
index.is_trained

In [None]:
assert not index.is_trained

In [None]:
index.train(sentence_embeddings)
index.is_trained

In [None]:
index.add(sentence_embeddings)
index.ntotal

In [None]:
k = 1
xq = model.encode(["O Brasil ser/u00e1 beneficiado com a privatiza"])
index.nprobe = 10

In [None]:
%%time
D, I = index.search(xq, k)  # search
print(I)

In [None]:
[f'{i}: {sentences[i]}' for i in I[0]]

In [None]:
mapp = I[0][0]
print(I[0][0])

In [None]:
sentences[I[0][0]]

In [None]:
data.id[data.label == 'p brasiliensi'].index.tolist()[0]

In [None]:
datad = pro_con
datad.head()

In [None]:
sentence_b = datad['label'].tolist()
len(sentence_b)

In [None]:
sentence_b_id = datad['id'].tolist()
len(sentence_b_id)

# **Cluster Gun**

In [None]:
from IPython.display import clear_output

In [None]:
from tqdm import tqdm

In [None]:
sentence_b[:5]

In [None]:
sentence_b_id[:5]

In [None]:
brels = []

In [None]:
brels

In [None]:
for x in tqdm(range(len(sentence_b))):
  k = 4
  xq = model.encode([sentence_b[x]])
  D, I = index.search(xq, k)
  X = sentences[I[0][0]]
  Y = sentence_b[x]
  px = []
  px.append(casualty.id[casualty.label == X].index.tolist()[0])
  px.append(str(sentence_b_id[x]))
  brels.append(px)

In [None]:
nxr = pd.DataFrame(
            brels, columns=['start', 'end'])
nxr.to_csv('./new_rels.csv', index=False)

In [None]:
len(brels)

# **Strict Work of assigning parents**

In [None]:
gf = pd.read_csv('./new_rels.csv')
gf.head()

In [None]:
nst = gf['start'].tolist()
nen = gf['end'].tolist()

In [None]:
for x in tqdm(range(len(nst))):
  if str(nst[x]) in orr.groups.keys():
    dfd = orr.get_group(str(nst[x]))
    imp = dfd['start'].tolist()
    for z in range(len(imp)):
      px = []
      px.append(imp[z])
      px.append(nen[x])
      rels.append(px)

In [None]:
xnr = pd.DataFrame(
            rels, columns=['start', 'end'])

In [None]:
xnr.to_csv('./new_optic_parent_rels.csv', index=False)

In [None]:
rels[2023920:]

In [None]:
len(rels)

# **Merging Graphs**

In [None]:
xnr = pd.read_csv('./new_optic_parent_rels.csv')

In [None]:
lab = xd['label'].tolist()
xd['concept'] = lab
xd['type'] = 'node'
xd.head()

In [None]:
xnr['type'] = 'relationship'
xnr.head()

In [None]:
len(xnr)

In [None]:
xnr.drop_duplicates(keep='first', inplace=True)
len(xnr)

In [None]:
L1 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xd.to_dict('r')]
L2 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xnr.to_dict('r')]

In [None]:
with open('./Graph.json', 'w') as file:
    json.dump({ "Graph": L1, "Relationship": L2}, file)

# **New JSON Loader**

In [None]:
f = open('./Graph.json',)

In [None]:
data = json.load(f)

In [None]:
f.close()

In [None]:
nodes = []
for i in tqdm(data['Graph']):
    px = []
    px.append(int(i['id']))
    px.append(i['label'])
    nodes.append(px)
  
# Closing file
f.close()

In [None]:
rels = []
for i in tqdm(data['Relationship']):
  px = []
  px.append(int(i['start']))
  px.append(int(i['end']))
  rels.append(px)
  
# Closing file
f.close()

In [None]:
xd = pd.DataFrame(
            nodes, columns=['id', 'label'])
xr = pd.DataFrame(
            rels, columns=['start', 'end'])

In [None]:
xd.head()

In [None]:
len(xd)

In [None]:
xd = xd.drop_duplicates(subset=['id'], keep='first', ignore_index=True)

In [None]:
len(xd)

In [None]:
xr.head()

In [None]:
xr.tail()

In [None]:
lab = xd['label'].tolist()
xd['concept'] = lab
xd['type'] = 'node'
xd.tail()

In [None]:
xr['type'] = 'relationship'
xr.head()

In [None]:
chunky = []
for i in range(0, 338000):
  chunky.append(i)

In [None]:
chunky[:5]

In [None]:
abs = xd['id'].tolist()
abs[:5]

In [None]:
a = list(set(abs)-set(chunky))

In [None]:
len(a)

In [None]:
a.sort()

In [None]:
a[:5]

In [None]:
len(xr)

In [None]:
xr = xr[~xr['start'].isin(a)]
xr = xr[~xr['end'].isin(a)]

In [None]:
len(xr)

In [None]:
xr.head()

In [None]:
L1 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xd.to_dict('r')]
L2 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xr.to_dict('r')]

In [None]:
with open('./Graph.json', 'w') as file:
    json.dump({ "Graph": L1, "Relationship": L2}, file)

# **Re-Indexing**

In [None]:
ff = pd.read_csv('./Indexing.csv')

In [None]:
actual = ff['a'].tolist()
graph = ff['g'].tolist()

In [None]:
new = []
for i in tqdm(range(len(actual))):
  flag = 0
  for j in range(len(graph)):
    if int(actual[i]) == int(graph[j]):
      graph.remove(graph[j])
      flag = 1
      break
  if flag == 1:
    new.append(int(actual[i]))
  else:
    new.append(-1)

In [None]:
dataf = []
for i in tqdm(range(len(actual))):
  px = []
  px.append(int(actual[i]))
  px.append(new[i])
  dataf.append(px)

In [None]:
xx = pd.DataFrame(
            dataf, columns=['actual', 'original'])

In [None]:
xx.to_csv('./RmeIndex.csv')

# **Applying new indexes**

In [None]:
f = open('./Graph.json',)

In [None]:
data = json.load(f)

In [None]:
f.close()

In [None]:
nodes = []
for i in tqdm(data['Graph']):
    px = []
    px.append(int(i['id']))
    px.append(i['label'])
    nodes.append(px)
  
# Closing file
f.close()

In [None]:
rels = []
for i in tqdm(data['Relationship']):
  px = []
  px.append(int(i['start']))
  px.append(int(i['end']))
  rels.append(px)
  
# Closing file
f.close()

In [None]:
xd = pd.DataFrame(
            nodes, columns=['id', 'label'])
xr = pd.DataFrame(
            rels, columns=['start', 'end'])

In [None]:
xd.head()

In [None]:
xr.head()

In [None]:
nin = xd['id'].tolist()
nsti = xr['start'].tolist()
neni = xr['end'].tolist()

In [None]:
indexes = pd.read_csv('./Rme.csv')
indexes.head()

In [None]:
org = indexes['original'].tolist()
n_in = indexes['new'].tolist()

In [None]:
nid = []
for x in tqdm(nin):
  nid.append(int(n_in[x]))

In [None]:
xd['id'] = nid
xd.tail()

In [None]:
xd = xd.sort_values('id')
xd = xd.reset_index(drop=True)
xd.tail()

In [None]:
d = []
r = []

In [None]:
for x in tqdm(range(len(nsti))):
  d.append(int(n_in[nsti[x]]))
  r.append(int(n_in[neni[x]]))

In [None]:
xr['start'] = d
xr['end'] = r

In [None]:
lab = xd['label'].tolist()
xd['concept'] = lab
xd['type'] = 'node'
xd.tail()

In [None]:
xr['type'] = 'relationship'
xr.head()

In [None]:
L1 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xd.to_dict('r')]
L2 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xr.to_dict('r')]

In [None]:
with open('./Re-Graph.json', 'w') as file:
    json.dump({ "Graph": L1, "Relationship": L2}, file)

# **Assigning Effects to the relations**

In [None]:
f = open('./Re-Graph.json',)

In [None]:
data = json.load(f)

In [None]:
f.close()

In [None]:
nodes = []
for i in tqdm(data['Graph']):
    px = []
    px.append(int(i['id']))
    px.append(i['label'])
    px.append(i['concept'])
    px.append(i['type'])
    nodes.append(px)
  
# Closing file
f.close()

100%|██████████| 333075/333075 [00:00<00:00, 461524.57it/s]


In [None]:
rels = []
for i in tqdm(data['Relationship']):
  px = []
  px.append(int(i['start']))
  px.append(int(i['end']))
  px.append(i['effect'])
  px.append(i['type'])
  rels.append(px)
  
# Closing file
f.close()

100%|██████████| 2023904/2023904 [00:03<00:00, 638027.85it/s]


In [None]:
xd = pd.DataFrame(
            nodes, columns=['id', 'label', 'concept', 'type'])
xr = pd.DataFrame(
            rels, columns=['start', 'end', 'effect', 'type'])

In [None]:
lab = xd['label'].tolist()
xd['concept'] = lab
xd['type'] = 'node'
xd.tail()

Unnamed: 0,id,label,concept,type
333070,333070,valuable neighborhood school,valuable neighborhood school,node
333071,333071,teacher independence,teacher independence,node
333072,333072,teacher motivation,teacher motivation,node
333073,333073,innovation making the profession attractive,innovation making the profession attractive,node
333074,333074,charter school choice,charter school choice,node


In [None]:
xr['type'] = 'relationship'
xr.head()

Unnamed: 0,start,end,type
0,0,1,relationship
1,0,10,relationship
2,0,89,relationship
3,0,53,relationship
4,0,55,relationship


In [None]:
xd.head()

Unnamed: 0,id,label,concept,type
0,0,accident,accident,node
1,1,death,death,node
2,2,disease,disease,node
3,3,pneumonium,pneumonium,node
4,4,cancer,cancer,node


In [None]:
xr.head()

Unnamed: 0,start,end,type
0,0,1,relationship
1,0,10,relationship
2,0,89,relationship
3,0,53,relationship
4,0,55,relationship


**Implementing VADER**

In [None]:
!pip install vaderSentiment

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 15.1 MB/s 
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [None]:
su = SentimentIntensityAnalyzer()

In [None]:
nin = xd['id'].tolist()
nlab = xd['label'].tolist()
nsti = xr['start'].tolist()
neni = xr['end'].tolist()

In [None]:
effect = []
for x in tqdm(range(len(nsti))):
  premise = nlab[nsti[x]]
  hypothesis = nlab[neni[x]]
  data = su.polarity_scores(f'{premise} but {hypothesis}')
  if data['compound'] <= 0:
    effect.append('negative')
  else:
    effect.append('positive')

  1%|          | 15673/2023904 [1:30:00<185:12:07,  3.01it/s]

In [None]:
xr['effect'] = effect
xr.head()

Unnamed: 0,start,end,type,effect
0,0,1,relationship,negative
1,0,10,relationship,negative
2,0,89,relationship,negative
3,0,53,relationship,negative
4,0,55,relationship,negative


In [None]:
L1 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xd.to_dict('r')]
L2 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xr.to_dict('r')]

  """Entry point for launching an IPython kernel.
  


In [None]:
with open('./ReNewEf-Graph.json', 'w') as file:
    json.dump({ "Graph": L1, "Relationship": L2}, file)

# **Adding Arg Graph**

In [None]:
xd.tail()

Unnamed: 0,id,label,concept,type
333070,333070,valuable neighborhood school,valuable neighborhood school,node
333071,333071,teacher independence,teacher independence,node
333072,333072,teacher motivation,teacher motivation,node
333073,333073,innovation making the profession attractive,innovation making the profession attractive,node
333074,333074,charter school choice,charter school choice,node


In [None]:
thr = len(xd)
nds = pd.read_csv('./arg_G.csv')
rsd = pd.read_csv('./arg_graph.csv') 

In [None]:
nds.head()

Unnamed: 0,id,label,concept
0,0,difficulty in defining fair use,difficulty in defining fair use
1,1,court battle,court battle
2,2,court,court
3,3,company,company
4,4,user,user


In [None]:
idf = nds['id'].tolist()
noid = []
for x in idf:
  noid.append(x + thr)
nds['id'] = noid

In [None]:
rsd.head()

Unnamed: 0,start,end,effect
0,0,1,positive
1,1,2,negative
2,1,3,negative
3,1,4,negative
4,5,6,positive


In [None]:
sti = rsd['start'].tolist()
eni = rsd['end'].tolist()
sd = []
se = []
for x in range(len(sti)):
  sd.append(sti[x] + thr)
  se.append(eni[x] + thr)
rsd['start'] = sd
rsd['end'] = se

In [None]:
rsd['type'] = 'relationship'
nds['type'] = 'node'

In [None]:
rsd.head()

Unnamed: 0,start,end,effect,type
0,333075,333076,positive,relationship
1,333076,333077,negative,relationship
2,333076,333078,negative,relationship
3,333076,333079,negative,relationship
4,333080,333081,positive,relationship


In [None]:
nds.head()

Unnamed: 0,id,label,concept,type
0,333075,difficulty in defining fair use,difficulty in defining fair use,node
1,333076,court battle,court battle,node
2,333077,court,court,node
3,333078,company,company,node
4,333079,user,user,node


In [None]:
xd = xd.append(nds)
xr = xr.append(rsd)

In [None]:
xd = xd.reset_index(drop=True)
xr = xr.reset_index(drop=True)

In [None]:
xd.tail()

Unnamed: 0,id,label,concept,type
337677,337677,employer to campaign year round,employer to campaign year round,node
337678,337678,collapsing of the skull of the partially born ...,collapsing of the skull of the partially born ...,node
337679,337679,risk of damage to the woman,risk of damage to the woman,node
337680,337680,random sobriety stop,random sobriety stop,node
337681,337681,unjust detection of other crime,unjust detection of other crime,node


In [None]:
xr.tail()

Unnamed: 0,start,end,type,effect
2032999,337668,337669,relationship,negative
2033000,337674,337675,relationship,positive
2033001,337678,335675,relationship,negative
2033002,337678,337679,relationship,negative
2033003,337680,337681,relationship,positive


In [None]:
L1 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xd.to_dict('r')]
L2 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xr.to_dict('r')]

  """Entry point for launching an IPython kernel.
  


In [None]:
with open('./ReNewEfArg-Graph.json', 'w') as file:
    json.dump({ "Graph": L1, "Relationship": L2}, file)

# **Assigning & Calculating Ranks**

In [None]:
f = open('./ReNewEfArg-Graph.json',)

In [None]:
data = json.load(f)

In [None]:
f.close()

In [None]:
nodes = []
for i in tqdm(data['Graph']):
    px = []
    px.append(int(i['id']))
    px.append(i['label'])
    px.append(i['concept'])
    px.append(i['type'])
    nodes.append(px)
  
# Closing file
f.close()

100%|██████████| 337682/337682 [00:00<00:00, 581402.38it/s]


In [None]:
rels = []
for i in tqdm(data['Relationship']):
  px = []
  px.append(int(i['start']))
  px.append(int(i['end']))
  px.append(i['effect'])
  px.append(i['type'])
  rels.append(px)
  
# Closing file
f.close()

100%|██████████| 2033004/2033004 [00:04<00:00, 501845.93it/s]


In [None]:
xd = pd.DataFrame(
            nodes, columns=['id', 'label', 'concept', 'type'])
xr = pd.DataFrame(
            rels, columns=['start', 'end', 'effect', 'type'])

In [None]:
xd.head()

Unnamed: 0,id,label,concept,type
0,0,accident,accident,node
1,1,death,death,node
2,2,disease,disease,node
3,3,pneumonium,pneumonium,node
4,4,cancer,cancer,node


In [None]:
xr.head()

Unnamed: 0,start,end,effect,type
0,0,1,negative,relationship
1,0,10,negative,relationship
2,0,89,negative,relationship
3,0,53,negative,relationship
4,0,55,negative,relationship


In [None]:
ndt = xd['id'].tolist()
eng = xr.groupby(['end'])
sng = xr.groupby(['start'])

In [None]:
k = 10000
n = len(xd)

In [None]:
im = k/n

In [None]:
im

0.02961366018917206

In [None]:
ranks = []

In [None]:
for x in tqdm(ndt):
  r = 0
  if x in eng.groups.keys():
    df = eng.get_group(x)
    ic = df['start'].tolist()
    for y in ic:
      if y in sng.groups.keys():
        dm = sng.get_group(y)
        l = len(dm)
        p = (k/l)*im
        r+=p
  ranks.append(r)

100%|██████████| 337682/337682 [08:19<00:00, 675.41it/s] 


In [None]:
xd['ranks'] = ranks

In [None]:
xd.head()

Unnamed: 0,id,label,concept,type,ranks
0,0,accident,accident,node,17399.134735
1,1,death,death,node,23440.78633
2,2,disease,disease,node,26235.653549
3,3,pneumonium,pneumonium,node,6641.914576
4,4,cancer,cancer,node,20368.378862


In [None]:
xd.tail()

Unnamed: 0,id,label,concept,type,ranks
337677,337677,employer to campaign year round,employer to campaign year round,node,49.3561
337678,337678,collapsing of the skull of the partially born ...,collapsing of the skull of the partially born ...,node,0.0
337679,337679,risk of damage to the woman,risk of damage to the woman,node,148.068301
337680,337680,random sobriety stop,random sobriety stop,node,0.0
337681,337681,unjust detection of other crime,unjust detection of other crime,node,296.136602


In [None]:
L1 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xd.to_dict('r')]
L2 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xr.to_dict('r')]

  """Entry point for launching an IPython kernel.
  


In [None]:
with open('./ReNewEfArgRanks-Graph.json', 'w') as file:
    json.dump({ "Graph": L1, "Relationship": L2}, file)

# **Assigning Pos Rank**

In [4]:
f = open('./ReNewEfArgRanks-Graph.json',)

In [5]:
data = json.load(f)

In [6]:
f.close()

In [7]:
nodes = []
for i in tqdm(data['Graph']):
    px = []
    px.append(int(i['id']))
    px.append(i['label'])
    px.append(i['concept'])
    px.append(i['type'])
    px.append(i['ranks'])
    nodes.append(px)
  
# Closing file
f.close()

100%|██████████| 337682/337682 [00:00<00:00, 374481.86it/s]


In [8]:
rels = []
for i in tqdm(data['Relationship']):
  px = []
  px.append(int(i['start']))
  px.append(int(i['end']))
  px.append(i['effect'])
  px.append(i['type'])
  rels.append(px)
  
# Closing file
f.close()

100%|██████████| 2033004/2033004 [00:03<00:00, 530249.98it/s]


In [10]:
xd = pd.DataFrame(
            nodes, columns=['id', 'label', 'concept', 'type', 'ranks'])
xr = pd.DataFrame(
            rels, columns=['start', 'end', 'effect', 'type'])

In [11]:
xd.head()

Unnamed: 0,id,label,concept,type,ranks
0,0,accident,accident,node,17399.134735
1,1,death,death,node,23440.78633
2,2,disease,disease,node,26235.653549
3,3,pneumonium,pneumonium,node,6641.914576
4,4,cancer,cancer,node,20368.378862


In [12]:
xr.head()

Unnamed: 0,start,end,effect,type
0,0,1,negative,relationship
1,0,10,negative,relationship
2,0,89,negative,relationship
3,0,53,negative,relationship
4,0,55,negative,relationship


In [13]:
orr = xr.groupby(['effect'])

In [14]:
pos = orr.get_group('positive')
neg = orr.get_group('negative')

In [35]:
len(pos)

331787

In [36]:
len(neg)

1701217

In [15]:
ndt = xd['id'].tolist()
eng = pos.groupby(['end'])
sng = pos.groupby(['start'])

In [16]:
k = 10000
n = len(xd)

In [17]:
im = k/n

In [18]:
im

0.02961366018917206

In [None]:
ranks = []

In [20]:
for x in tqdm(ndt):
  r = 0
  if x in eng.groups.keys():
    df = eng.get_group(x)
    ic = df['start'].tolist()
    for y in ic:
      if y in sng.groups.keys():
        dm = sng.get_group(y)
        l = len(dm)
        p = (k/l)*im
        r+=p
  ranks.append(r)

100%|██████████| 337682/337682 [01:27<00:00, 3878.09it/s]


In [21]:
xd['Pos'] = ranks

In [22]:
xd.head()

Unnamed: 0,id,label,concept,type,ranks,Pos
0,0,accident,accident,node,17399.134735,0.0
1,1,death,death,node,23440.78633,13.32465
2,2,disease,disease,node,26235.653549,2072.677657
3,3,pneumonium,pneumonium,node,6641.914576,273.826409
4,4,cancer,cancer,node,20368.378862,0.0


In [23]:
xd.tail()

Unnamed: 0,id,label,concept,type,ranks,Pos
337677,337677,employer to campaign year round,employer to campaign year round,node,49.3561,49.3561
337678,337678,collapsing of the skull of the partially born ...,collapsing of the skull of the partially born ...,node,0.0,0.0
337679,337679,risk of damage to the woman,risk of damage to the woman,node,148.068301,0.0
337680,337680,random sobriety stop,random sobriety stop,node,0.0,0.0
337681,337681,unjust detection of other crime,unjust detection of other crime,node,296.136602,296.136602


In [24]:
ndt = xd['id'].tolist()
eng = neg.groupby(['end'])
sng = neg.groupby(['start'])

In [25]:
k = 10000
n = len(xd)

In [26]:
im = k/n

In [27]:
im

0.02961366018917206

In [28]:
ranks = []

In [29]:
for x in tqdm(ndt):
  r = 0
  if x in eng.groups.keys():
    df = eng.get_group(x)
    ic = df['start'].tolist()
    for y in ic:
      if y in sng.groups.keys():
        dm = sng.get_group(y)
        l = len(dm)
        p = (k/l)*im
        r+=p
  ranks.append(r)

100%|██████████| 337682/337682 [07:07<00:00, 789.73it/s] 


In [30]:
xd['Neg'] = ranks

In [31]:
xd.head()

Unnamed: 0,id,label,concept,type,ranks,Pos,Neg
0,0,accident,accident,node,17399.134735,0.0,17508.337566
1,1,death,death,node,23440.78633,13.32465,23751.858104
2,2,disease,disease,node,26235.653549,2072.677657,24777.582845
3,3,pneumonium,pneumonium,node,6641.914576,273.826409,6487.896418
4,4,cancer,cancer,node,20368.378862,0.0,20963.63106


In [32]:
xd.tail()

Unnamed: 0,id,label,concept,type,ranks,Pos,Neg
337677,337677,employer to campaign year round,employer to campaign year round,node,49.3561,49.3561,0.0
337678,337678,collapsing of the skull of the partially born ...,collapsing of the skull of the partially born ...,node,0.0,0.0,0.0
337679,337679,risk of damage to the woman,risk of damage to the woman,node,148.068301,0.0,148.068301
337680,337680,random sobriety stop,random sobriety stop,node,0.0,0.0,0.0
337681,337681,unjust detection of other crime,unjust detection of other crime,node,296.136602,296.136602,0.0


In [33]:
L1 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xd.to_dict('r')]
L2 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xr.to_dict('r')]

  """Entry point for launching an IPython kernel.
  


In [34]:
with open('./ReNewEfArgRanksPN-Graph.json', 'w') as file:
    json.dump({ "Graph": L1, "Relationship": L2}, file)

# **Real Page Rank algorithm**

In [43]:
f = open('./ReNewEfArgRanks-Graph.json',)

In [44]:
data = json.load(f)

In [45]:
f.close()

In [46]:
nodes = []
for i in tqdm(data['Graph']):
    px = []
    px.append(int(i['id']))
    px.append(i['label'])
    px.append(i['concept'])
    px.append(i['type'])
    px.append(i['ranks'])
    nodes.append(px)
  
# Closing file
f.close()

100%|██████████| 337682/337682 [00:00<00:00, 476766.74it/s]


In [None]:
rels = []
for i in tqdm(data['Relationship']):
  px = []
  px.append(int(i['start']))
  px.append(int(i['end']))
  px.append(i['effect'])
  px.append(i['type'])
  rels.append(px)
  
# Closing file
f.close()

100%|██████████| 2033004/2033004 [00:04<00:00, 449614.77it/s]


In [None]:
xd = pd.DataFrame(
            nodes, columns=['id', 'label', 'concept', 'type', 'ranks'])
xr = pd.DataFrame(
            rels, columns=['start', 'end', 'effect', 'type'])

In [None]:
xd.head()

Unnamed: 0,id,label,concept,type,ranks
0,0,accident,accident,node,17399.134735
1,1,death,death,node,23440.78633
2,2,disease,disease,node,26235.653549
3,3,pneumonium,pneumonium,node,6641.914576
4,4,cancer,cancer,node,20368.378862


In [None]:
xr.head()

Unnamed: 0,start,end,effect,type
0,0,1,negative,relationship
1,0,10,negative,relationship
2,0,89,negative,relationship
3,0,53,negative,relationship
4,0,55,negative,relationship


In [None]:
ndt = xd['id'].tolist()
eng = xr.groupby(['end'])
sng = xr.groupby(['start'])

In [None]:
rank = xd['ranks'].tolist()

In [None]:
d = 0.85
n = len(xd)

In [None]:
im = (1-d)/n

In [None]:
ranks = []

In [None]:
for x in tqdm(ndt):
  r = 0
  sum = 0
  if x in eng.groups.keys():
    df = eng.get_group(x)
    ic = df['start'].tolist()
    for y in ic:
      if y in sng.groups.keys():
        dm = sng.get_group(y)
        l = len(dm)
        p = rank[y]/abs(l)
        sum+=p
  r = im + d * sum
  ranks.append(r)

100%|██████████| 337682/337682 [09:17<00:00, 605.35it/s] 


In [None]:
xd['final_ranks'] = ranks

In [None]:
xd.head()

Unnamed: 0,id,label,concept,type,ranks,final_ranks
0,0,accident,accident,node,17399.134735,1552.897128
1,1,death,death,node,23440.78633,11702.12012
2,2,disease,disease,node,26235.653549,5132.34662
3,3,pneumonium,pneumonium,node,6641.914576,1967.091368
4,4,cancer,cancer,node,20368.378862,3051.568576


In [None]:
xd.tail()

Unnamed: 0,id,label,concept,type,ranks,final_ranks
337677,337677,employer to campaign year round,employer to campaign year round,node,49.3561,4.442049e-07
337678,337678,collapsing of the skull of the partially born ...,collapsing of the skull of the partially born ...,node,0.0,4.442049e-07
337679,337679,risk of damage to the woman,risk of damage to the woman,node,148.068301,4.442049e-07
337680,337680,random sobriety stop,random sobriety stop,node,0.0,4.442049e-07
337681,337681,unjust detection of other crime,unjust detection of other crime,node,296.136602,4.442049e-07


In [None]:
L1 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xd.to_dict('r')]
L2 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xr.to_dict('r')]

  """Entry point for launching an IPython kernel.
  


In [None]:
with open('./ReNewEfArgRanksFinal-Graph.json', 'w') as file:
    json.dump({ "Graph": L1, "Relationship": L2}, file)

In [None]:
xd.to_csv('./FinalRankedGraph.csv')

# **Real Page Rank algorithm with Pos & Neg**

In [47]:
f = open('./ReNewEfArgRanksFinal-Graph.json',)

In [48]:
data = json.load(f)

In [49]:
f.close()

In [50]:
nodes = []
for i in tqdm(data['Graph']):
    px = []
    px.append(int(i['id']))
    px.append(i['label'])
    px.append(i['concept'])
    px.append(i['type'])
    px.append(i['ranks'])
    px.append(i['final_ranks'])
    nodes.append(px)
  
# Closing file
f.close()

100%|██████████| 337682/337682 [00:01<00:00, 177180.02it/s]


In [51]:
rels = []
for i in tqdm(data['Relationship']):
  px = []
  px.append(int(i['start']))
  px.append(int(i['end']))
  px.append(i['effect'])
  px.append(i['type'])
  rels.append(px)
  
# Closing file
f.close()

100%|██████████| 2033004/2033004 [00:04<00:00, 486151.15it/s]


In [53]:
xd = pd.DataFrame(
            nodes, columns=['id', 'label', 'concept', 'type', 'ranks', 'final_ranks'])
xr = pd.DataFrame(
            rels, columns=['start', 'end', 'effect', 'type'])

In [54]:
rnk = xd['final_ranks'].tolist()

In [55]:
f = open('./ReNewEfArgRanksPN-Graph.json',)

In [56]:
data = json.load(f)

In [57]:
f.close()

In [58]:
nodes = []
for i in tqdm(data['Graph']):
    px = []
    px.append(int(i['id']))
    px.append(i['label'])
    px.append(i['concept'])
    px.append(i['type'])
    px.append(i['ranks'])
    px.append(i['Pos'])
    px.append(i['Neg'])
    nodes.append(px)
  
# Closing file
f.close()

100%|██████████| 337682/337682 [00:00<00:00, 463553.52it/s]


In [59]:
rels = []
for i in tqdm(data['Relationship']):
  px = []
  px.append(int(i['start']))
  px.append(int(i['end']))
  px.append(i['effect'])
  px.append(i['type'])
  rels.append(px)
  
# Closing file
f.close()

100%|██████████| 2033004/2033004 [00:05<00:00, 362873.49it/s]


In [60]:
xd = pd.DataFrame(
            nodes, columns=['id', 'label', 'concept', 'type', 'ranks', 'Pos', 'Neg'])
xr = pd.DataFrame(
            rels, columns=['start', 'end', 'effect', 'type'])

In [61]:
xd['final_ranks'] = rnk

In [62]:
xd.head()

Unnamed: 0,id,label,concept,type,ranks,Pos,Neg,final_ranks
0,0,accident,accident,node,17399.134735,0.0,17508.337566,1552.897128
1,1,death,death,node,23440.78633,13.32465,23751.858104,11702.12012
2,2,disease,disease,node,26235.653549,2072.677657,24777.582845,5132.34662
3,3,pneumonium,pneumonium,node,6641.914576,273.826409,6487.896418,1967.091368
4,4,cancer,cancer,node,20368.378862,0.0,20963.63106,3051.568576


In [63]:
xr.head()

Unnamed: 0,start,end,effect,type
0,0,1,negative,relationship
1,0,10,negative,relationship
2,0,89,negative,relationship
3,0,53,negative,relationship
4,0,55,negative,relationship


In [64]:
orr = xr.groupby(['effect'])

In [65]:
pos = orr.get_group('positive')
neg = orr.get_group('negative')

In [66]:
len(pos)

331787

In [67]:
len(neg)

1701217

In [68]:
ndt = xd['id'].tolist()
eng = pos.groupby(['end'])
sng = pos.groupby(['start'])

In [69]:
rank = xd['Pos'].tolist()

In [70]:
d = 0.85
n = len(xd)

In [71]:
im = (1-d)/n

In [72]:
ranks = []

In [73]:
for x in tqdm(ndt):
  r = 0
  sum = 0
  if x in eng.groups.keys():
    df = eng.get_group(x)
    ic = df['start'].tolist()
    for y in ic:
      if y in sng.groups.keys():
        dm = sng.get_group(y)
        l = len(dm)
        p = rank[y]/abs(l)
        sum+=p
  r = im + d * sum
  ranks.append(r)

100%|██████████| 337682/337682 [01:20<00:00, 4204.66it/s]


In [74]:
xd['pRank'] = ranks

In [75]:
xd.head()

Unnamed: 0,id,label,concept,type,ranks,Pos,Neg,final_ranks,pRank
0,0,accident,accident,node,17399.134735,0.0,17508.337566,1552.897128,4.442049e-07
1,1,death,death,node,23440.78633,13.32465,23751.858104,11702.12012,4.442049e-07
2,2,disease,disease,node,26235.653549,2072.677657,24777.582845,5132.34662,731.1114
3,3,pneumonium,pneumonium,node,6641.914576,273.826409,6487.896418,1967.091368,967.2191
4,4,cancer,cancer,node,20368.378862,0.0,20963.63106,3051.568576,4.442049e-07


In [76]:
xd.tail()

Unnamed: 0,id,label,concept,type,ranks,Pos,Neg,final_ranks,pRank
337677,337677,employer to campaign year round,employer to campaign year round,node,49.3561,49.3561,0.0,4.442049e-07,4.442049e-07
337678,337678,collapsing of the skull of the partially born ...,collapsing of the skull of the partially born ...,node,0.0,0.0,0.0,4.442049e-07,4.442049e-07
337679,337679,risk of damage to the woman,risk of damage to the woman,node,148.068301,0.0,148.068301,4.442049e-07,4.442049e-07
337680,337680,random sobriety stop,random sobriety stop,node,0.0,0.0,0.0,4.442049e-07,4.442049e-07
337681,337681,unjust detection of other crime,unjust detection of other crime,node,296.136602,296.136602,0.0,4.442049e-07,4.442049e-07


In [77]:
ndt = xd['id'].tolist()
eng = neg.groupby(['end'])
sng = neg.groupby(['start'])

In [78]:
rank = xd['Neg'].tolist()

In [79]:
d = 0.85
n = len(xd)

In [80]:
im = (1-d)/n

In [81]:
ranks = []

In [82]:
for x in tqdm(ndt):
  r = 0
  sum = 0
  if x in eng.groups.keys():
    df = eng.get_group(x)
    ic = df['start'].tolist()
    for y in ic:
      if y in sng.groups.keys():
        dm = sng.get_group(y)
        l = len(dm)
        p = rank[y]/abs(l)
        sum+=p
  r = im + d * sum
  ranks.append(r)

100%|██████████| 337682/337682 [06:05<00:00, 923.30it/s] 


In [83]:
xd['nRank'] = ranks

In [84]:
xd.head()

Unnamed: 0,id,label,concept,type,ranks,Pos,Neg,final_ranks,pRank,nRank
0,0,accident,accident,node,17399.134735,0.0,17508.337566,1552.897128,4.442049e-07,1584.633381
1,1,death,death,node,23440.78633,13.32465,23751.858104,11702.12012,4.442049e-07,11357.284706
2,2,disease,disease,node,26235.653549,2072.677657,24777.582845,5132.34662,731.1114,5160.15546
3,3,pneumonium,pneumonium,node,6641.914576,273.826409,6487.896418,1967.091368,967.2191,1484.111504
4,4,cancer,cancer,node,20368.378862,0.0,20963.63106,3051.568576,4.442049e-07,3088.935625


In [85]:
xd.tail()

Unnamed: 0,id,label,concept,type,ranks,Pos,Neg,final_ranks,pRank,nRank
337677,337677,employer to campaign year round,employer to campaign year round,node,49.3561,49.3561,0.0,4.442049e-07,4.442049e-07,4.442049e-07
337678,337678,collapsing of the skull of the partially born ...,collapsing of the skull of the partially born ...,node,0.0,0.0,0.0,4.442049e-07,4.442049e-07,4.442049e-07
337679,337679,risk of damage to the woman,risk of damage to the woman,node,148.068301,0.0,148.068301,4.442049e-07,4.442049e-07,4.442049e-07
337680,337680,random sobriety stop,random sobriety stop,node,0.0,0.0,0.0,4.442049e-07,4.442049e-07,4.442049e-07
337681,337681,unjust detection of other crime,unjust detection of other crime,node,296.136602,296.136602,0.0,4.442049e-07,4.442049e-07,4.442049e-07


In [86]:
L1 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xd.to_dict('r')]
L2 = [{k: v for k, v in x.items() if pd.notnull(v)} for x in xr.to_dict('r')]

  """Entry point for launching an IPython kernel.
  


In [87]:
with open('./ReNewEfArgRanksFinalPN-Graph.json', 'w') as file:
    json.dump({ "Graph": L1, "Relationship": L2}, file)

In [88]:
xd.to_csv('./FinalRankedGraphPN.csv')