In [20]:
# Python 2.7.12

In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
%matplotlib inline

In [22]:
topics=pd.read_csv('../../All data/advanced/FieldsOfStudy.txt',sep='\t', header = None,\
                   names = ['topic', 'Rank', 'NormalizedName', 'name', 'MainType', 'Level', 'PaperCount',\
                            'CitationCount', 'CreatedDate'])

In [23]:
papers = pd.read_csv('../../Factorization_Machine/paper_citation_lowlevel_cs.csv', index_col = 0)

In [24]:
level3_topics = topics[topics['Level'] == 3]['topic'].unique()

In [25]:
paper_with_level3 = papers[papers['FieldOfStudyId'].isin(level3_topics)]

In [26]:
def find_link_all(paper_year):
    pId_fIds = {} #key pId, value = list of fId (keywords)
    for index, row in paper_year.iterrows():
        pId, fId = row["PaperId"], row["FieldOfStudyId"]
        if pId not in pId_fIds:
            pId_fIds[pId] = []
        pId_fIds[pId].append(fId)
    share_count = {}
    for pId in pId_fIds:
        fIds = pId_fIds[pId]
        for fId in fIds:
            if fId not in share_count:
                share_count[fId] = {}
            for fId2 in fIds:
                if fId == fId2:
                    continue
                if fId2 not in share_count[fId]:
                    share_count[fId][fId2] = 0
                share_count[fId][fId2] += 1
                
    return share_count

In [27]:
share_count_year = {}
for year in range(1990, 2018):
    paper_year = paper_with_level3[paper_with_level3['Year'] == year]
    share_count_year[year] = find_link_all(paper_year)

In [28]:
from copy import deepcopy
share_count_year_copy = deepcopy(share_count_year)

In [29]:
# Remove less than 10 values

for year in xrange(1990, 2018):
    for fId1 in list(share_count_year_copy[year]):
        for fId2 in list(share_count_year_copy[year][fId1]):
            if share_count_year_copy[year][fId1][fId2] < 10:
                del share_count_year_copy[year][fId1][fId2]

        if len(share_count_year_copy[year][fId1]) == 0:
            del share_count_year_copy[year][fId1]

In [30]:
# Re-index all keywords in the dict, this has to be in order of year

keywords_index = {}
i = 0
for year in xrange(1990, 2018):
     for fId in list(share_count_year_copy[year]):
            if fId not in keywords_index:
                keywords_index[fId] = i
                i += 1

In [31]:
keywords_index_table = pd.DataFrame(columns = ['topic', 'graph_index'])
keywords_index_table['topic'] = keywords_index.keys()
keywords_index_table['graph_index'] = keywords_index.values()

In [32]:
keywords_index_table.to_csv('keywords_index_multics.csv')

In [33]:
# keywords_index_table = pd.read_csv('keywords_index_multics.csv', index_col = 0)

In [34]:
import networkx as nx

In [35]:
# Build graph for every year


full_link = []
for year in xrange(1990, 2018):
    last_node = 0
    for fId1 in list(share_count_year_copy[year]):
        if keywords_index[fId1] > last_node:
            last_node = keywords_index[fId1]
    
    edges = []
    graph = nx.MultiGraph()
    graph.add_nodes_from([i for i in xrange(last_node+1)]) 
    for fId1 in list(share_count_year_copy[year]):
        for fId2 in list(share_count_year_copy[year][fId1]):     
            if (min(fId1, fId2), max(fId1, fId2)) not in edges:
                edges.append((min(fId1, fId2), max(fId1, fId2)))
                for j in range(share_count_year_copy[year][fId1][fId2]):
                    graph.add_edge(keywords_index[fId1], keywords_index[fId2])

    full_link.append(graph)

In [None]:
np.savez('data/Multi_Computer_Science/graphs.npz', graph = full_link)

In [None]:
%%bash

python run_script.py --max_time 28 --dataset Multi_Computer_Science

In [39]:
for num in range(0, 27):
    emb = pd.read_csv('logs/DySAT_default/output/default_embs_Multi_Computer_Science_{}.csv'.format(str(num)), header = None)
    
    # Join actualy keywords back
    temporal_emb = keywords_index_table.merge(emb, left_on = 'graph_index', right_index = True)
    temporal_emb.drop('graph_index', axis = 1, inplace = True)
    
    temporal_emb = temporal_emb.merge(topics[['topic', 'name']], how = 'left', on = 'topic')
    column_name = list(temporal_emb.columns.values) 
    column_name.remove('topic') 
    column_name.remove('name') 
    temporal_emb = temporal_emb[['topic', 'name']+column_name]
    
    temporal_emb.to_csv('temporal_embedding_multics_{}.csv'.format(str(num+1991)))