In [1]:
'''
methods:
- select papers from valid journals
- select papers citing papers from valid journals
- calculates IF
- count number of papers per journal
'''

import glob
import datetime
import dbgz, json
import WOSRaw as wos
import pandas as pd
import igraph as ig
import xnetwork as xnet

from pathlib import Path
from tqdm.auto import tqdm
from functools import partial
from multiprocessing import Pool
from collections import defaultdict
from timeit import default_timer as timer

In [None]:

# Path to the existing dbgz file
WOSArchivePath = Path("/mnt/e/WoS/WoS_2022_DBGZ/WoS_2022_All.dbgz")

items_journal = []

journal = 'chem. mat.'

today = datetime.date.today()

def get_papers_from_journal(journal):
    df = pd.read_csv('all_valid_papers_2023-05-09.txt', sep='\t', names=['wos_id', 'refs', 'year', 'journal', 'title'])
    
    papers = df[df['journal'] == journal]
    return papers
    
    
def get_papers_citing_papers(papers):
    df = pd.read_csv('citing_all_valid_2023-05-09.txt', sep='\t', names=['citing', 'cited'])
    refs = df[df['cited'].isin(set(papers)) & df['citing'].isin(set(papers))]
    print(len(refs))
    return refs
 
def get_network(nodes, edges, journal):
    
    names = []
    years = []
    journals = []
    titles = []
    
    print(nodes.head())
    
    print('nodes...')
    for _,row in nodes.iterrows():
        names.append(row['wos_id'])
        years.append(row['year'])
        journals.append(row['journal'])
        titles.append(row['title'])
    
    print('edges...')
    edges2 = []
    for _, row in edges.iterrows():
        edges2.append((row['citing'], row['cited']))

    g = ig.Graph(directed=True)
    g.add_vertices(len(names))
    
    g.vs['name'] = names
    g.vs['journal'] = journals
    print(set(journals))
    g.vs['year'] = years
    g.vs['title'] = titles
    print(edges2[:10])
    g.add_edges(edges2)
    
    xnet.igraph2xnet(g, 'citing_net_{}_journals_{}.xnet'.format(journal, today))
    

nodes = get_papers_from_journal(journal)
edges = get_papers_citing_papers(nodes['wos_id'])  
get_network(nodes, edges, journal)


# TODO: para janelas de tempo de 5 anos

In [28]:
today = datetime.date.today()
begin_year = 2023-35
end_year = 2022
time_window = 5
for year in range(begin_year, end_year, time_window):
    print(year, year+time_window-1)
    cit_net = xnet.xnet2igraph('citing_net_chem. mat._journals_2023-05-25.xnet')
    vs1 = cit_net.vs.select(year_le=year-1)
    vs = []
    for v in vs1:
        vs.append(v.index)
    vs2 = cit_net.vs.select(year_ge=year+time_window)
    for v in vs2:
        vs.append(v.index)
    cit_net.delete_vertices(vs)
    print(cit_net.vcount())
    xnet.igraph2xnet(cit_net, 'citing_net_chem. mat._time_windows_{}_{}_{}.xnet'.format(year, year + time_window-1, today))
    
#     att_name = '{}_{}'.format(year, year+time_window-1)
#     cit_net.vs[att_name] = 'no'
#     vs[att_name] = 'yes'
    
print(cit_net.vs.attributes())

1988 1992
771
1993 1997
1894
1998 2002
3144
2003 2007
4451
2008 2012
3963
2013 2017
4716
2018 2022
3946
['name', 'journal', 'title', 'year']
