Time profiling for the generation of a single flower

In [3]:
# Extensions
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [98]:
# Import list
from webapp.utils                  import get_all_paper_ids, get_all_normalised_names
from core.search.query_info        import paper_info_mag_check_multiquery
from core.score.agg_utils          import get_coauthor_mapping
from core.score.agg_paper_info     import score_paper_info_list
from core.score.agg_paper_info     import score_leaves
from core.utils.entity_type        import Entity_type
from core.score.agg_filter         import filter_year
from core.score.agg_score          import agg_score_df
from core.flower.flower_bloom_data import score_df_to_graph
from webapp.graph                  import processdata

In [14]:
# Inputs
sigmm_dict = {'ConferenceIds': [1135237122]}

In [15]:
%%time

# Get the selected papers
selected_papers = get_all_paper_ids(sigmm_dict)
entity_names = get_all_normalised_names(sigmm_dict)

{'expr': 'Composite(C.CId=1135237122)', 'count': 1000, 'offset': 0, 'attributes': 'Id'}
{'expr': 'Composite(C.CId=1135237122)', 'count': 1000, 'offset': 1000, 'attributes': 'Id'}
{'expr': 'Composite(C.CId=1135237122)', 'count': 1000, 'offset': 2000, 'attributes': 'Id'}
{'expr': 'Composite(C.CId=1135237122)', 'count': 1000, 'offset': 3000, 'attributes': 'Id'}
{'expr': 'Composite(C.CId=1135237122)', 'count': 1000, 'offset': 4000, 'attributes': 'Id'}
{'expr': 'Composite(C.CId=1135237122)', 'count': 1000, 'offset': 5000, 'attributes': 'Id'}
[2155893237, 2066941820, 1963882359, 2091503252, 2085662862, 2108333036, 2138079527, 2078132377, 2153677638, 2090777335, 2106277773, 2116790095, 2025449213, 2075456404, 1965301399, 2168102414, 2108666265, 2003994100, 2105568170, 2006180404, 1989085630, 2027518030, 2003856922, 2143800062, 2122629722, 2162134050, 2065619953, 1978920452, 2018318499, 2084584807, 2022582203, 2143854982, 1980252284, 2008824967, 2123229215, 2167993212, 2161369719, 1980852932, 

In [72]:
%%time

# Get paper information
paper_information = paper_info_mag_check_multiquery(selected_papers)

Complete cache entries found: 5797
Partial cache entries found: 0
No cache entries found: 0
Total ids to query: 5797
CPU times: user 17.2 s, sys: 2.03 s, total: 19.2 s
Wall time: 28.7 s


In [73]:
%%time

# Get coauthor map
coauthors = get_coauthor_mapping(paper_information)

CPU times: user 72 ms, sys: 4 ms, total: 76 ms
Wall time: 74.1 ms


In [74]:
%%time

# Initial score df
score_df = score_paper_info_list(paper_information)

CPU times: user 17.2 s, sys: 1.19 s, total: 18.4 s
Wall time: 17.5 s


In [84]:
# Flower properties
flower_type, leaves = ('author', [Entity_type.AUTH])
config = {
        'self_cite': True,
        'icoauthor': True,
        'pub_lower': None,
        'pub_upper': None,
        'cit_lower': None,
        'cit_upper': None,
        }

In [83]:
%%time

# Type score
entity_score = score_leaves(score_df, leaves)

CPU times: user 5.1 s, sys: 92 ms, total: 5.2 s
Wall time: 3.84 s


In [87]:
%%time

# Ego name removal
if (flower_type != 'conf'):
    entity_score = entity_score[~entity_score['entity_name'].str.lower()\
           .isin(entity_names)]

# Self citation filter
if not config['self_cite']:
    entity_score = entity_score[~entity_score['self_cite']]

# Filter publication year for ego's paper
filter_score = filter_year(entity_score, config['pub_lower'],
                                         config['pub_upper'])

# Filter Citaiton year for reference links
filter_score = filter_year(filter_score, config['cit_lower'],
                                         config['cit_upper'],
                                         index = 'influence_year')

CPU times: user 1.58 s, sys: 0 ns, total: 1.58 s
Wall time: 1.58 s


In [90]:
%%time

# Aggregate
agg_score = agg_score_df(filter_score)


---
2018-08-14 06:35:38.433157 start score generation
2018-08-14 06:35:40.347705 finish score generation
---
CPU times: user 2.84 s, sys: 24 ms, total: 2.86 s
Wall time: 1.91 s


In [93]:
%%time

# Get top scores for graph
if (flower_type != 'conf'):
    agg_score = agg_score[ ~agg_score['entity_name'].isin(entity_names) ]
agg_score = agg_score.head(n=25)
agg_score.ego = ''

CPU times: user 56 ms, sys: 4 ms, total: 60 ms
Wall time: 59.5 ms


In [96]:
%%time

# Graph score
graph_score = score_df_to_graph(agg_score)


---
2018-08-14 06:37:24.902221 start graph generation
2018-08-14 06:37:26.118474 finish graph generation
---
CPU times: user 1.22 s, sys: 24 ms, total: 1.24 s
Wall time: 1.22 s


In [99]:
%%time

# D3 format
data = processdata(flower_type, graph_score)

CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 3.45 ms
