In [3]:
from pyalex import (
    Works, Authors, Sources,
    Institutions, Concepts, Publishers, Funders
)
import pyalex
import pandas as pd
import numpy as np
pyalex.config.email = "david@rs21.io"

from flair.embeddings import DocumentPoolEmbeddings
from flair.data import Sentence
from flair.embeddings import SentenceTransformerDocumentEmbeddings

EMBEDDING_MODEL_1 = "all-mpnet-base-v2" 

# this one is also good: all-MiniLM-L6-v2
EMBEDDING_MODEL_2 = "all-MiniLM-L6-v2"
SENT_EMBEDDINGS_1 = SentenceTransformerDocumentEmbeddings(EMBEDDING_MODEL_1)
SENT_EMBEDDINGS_2 = SentenceTransformerDocumentEmbeddings(EMBEDDING_MODEL_2)
DOC_EMBEDDINGS= DocumentPoolEmbeddings([SENT_EMBEDDINGS_2])

import torch
from tqdm import tqdm
import yake
import umap.umap_ as umap
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture as GMM
import altair as alt
import math
import plotly.express as px
import textwrap

In [13]:
#search_term = 'satellite'
#robot_concepts = Concepts().search_filter(display_name=search_term).get()
#len(robot_concepts)
#for i in range(len(robot_concepts)):
#    id_, display_name = robot_concepts[i]['id'], robot_concepts[i]['display_name']
#    print(id_, display_name)
places_search_term = ('Zhengzhou University of Aeronautics|Nanjing University of Aeronautics and Astronautics|' +  
              'North China Institute of Science and Technology|' + 
              'North China Institute of Aerospace Engineering|'  + 
              'North China Research Institute of Electro-optics|' + 
              'Shenyang Aerospace University|' + 
              'Space Engineering University|' + 
              'China Aerospace Science and Industry Corporation (China)|' +  
              'China Aerospace Science and Technology Corporation|' +
              'PLA Army Engineering University') 
    

In [14]:
search_term = 'jamming'
search_term = 'radar jamming and deception|electronic warfare|Network-centric warfare|Air-to-air missile'
search_term = 'radar jamming and deception|electronic warfare|Network-centric warfare|missile guidance'
jamming_concepts = Concepts().\
search_filter(display_name=search_term).get()

In [15]:
concepts = []
for i in range(len(jamming_concepts)):
    id_, display_name = jamming_concepts[i]['id'], jamming_concepts[i]['display_name']
    concepts.append((id_, display_name))
concepts

[('https://openalex.org/C522053795', 'Missile guidance'),
 ('https://openalex.org/C176381164', 'Radar jamming and deception'),
 ('https://openalex.org/C133082901', 'Electronic warfare'),
 ('https://openalex.org/C2781187084', 'Network-centric warfare')]

In [16]:
def process_works_list(worklist:list):
    """
    transforms the 
    works list into a dataframe.
    """
    abstracts_dict = {h["id"]:h["abstract"] for h in worklist}
    df = pd.DataFrame.from_records(worklist)
    del df['abstract_inverted_index']
    df['abstract'] = df['id'].map(abstracts_dict)
   # df['author_affils'] = df['authorships'].apply(get_authors_and_affils)
    return df

In [17]:
for i in range(len(jamming_concepts)):
    print(jamming_concepts[i]['id'], jamming_concepts[i]['works_count'])

https://openalex.org/C522053795 6157
https://openalex.org/C176381164 2571
https://openalex.org/C133082901 3218
https://openalex.org/C2781187084 1794


In [18]:
len(Works().filter(publication_year='>2015').search("space based laser").\
filter(authorships={"institutions":{"country_code":"CN"}}).get())

25

In [19]:
def get_hpm_frame():
    #hpm_pager = Works().filter(publication_year='>2020').search("high power microwave").\
#filter(authorships={"institutions":{"country_code":"CN"}}).paginate(per_page=200,
#                                                                    n_max=None)
    hpm_pager = Works().filter(publication_year='>2020').search("high power microwave").\
        paginate(per_page=200,  n_max=None)
    df = pd.DataFrame()
    for page in tqdm(hpm_pager):
        dfpage = process_works_list(page)
        df = pd.concat([df, dfpage], ignore_index=True)
        df.drop_duplicates(subset='id', keep='first',inplace=True)
    return df                                                               
    
#print(de)

In [20]:
def get_sbl_frame():
    #sbl_pager = Works().filter(publication_year='>2020').search("space based laser").\
#filter(authorships={"institutions":{"country_code":"CN"}}).paginate(per_page=200,
#                                                                    n_max=None)
    sbl_pager = Works().filter(publication_year='>2020').search("space based laser").\
        paginate(per_page=200,
                                                                    n_max=None)
    df = pd.DataFrame()
    for page in tqdm(sbl_pager):
        dfpage = process_works_list(page)
        df = pd.concat([df, dfpage], ignore_index=True)
        df.drop_duplicates(subset='id', keep='first',inplace=True)
    return df                                                               
    
#print(de)

In [21]:
def get_kkv_frame():
    #kkv_pager = Works().filter(publication_year='>2020').search("kinetic kill vehicle").\
#filter(authorships={"institutions":{"country_code":"CN"}}).paginate(per_page=200,
#                                                                    n_max=None)
    kkv_pager = Works().filter(publication_year='>2020').search("kinetic kill vehicle").\
        paginate(per_page=200,
                                                                    n_max=None)
    df = pd.DataFrame()
    for page in tqdm(kkv_pager):
        dfpage = process_works_list(page)
        df = pd.concat([df, dfpage], ignore_index=True)
        df.drop_duplicates(subset='id', keep='first',inplace=True)
    return df                                                               
    

In [22]:
def get_rka_frame():
   # rka_pager = Works().filter(publication_year='>2020').search("relativistic klystron amplifier").\
#filter(authorships={"institutions":{"country_code":"CN"}}).paginate(per_page=200,
#                                                                    n_max=None)
    rka_pager = Works().filter(publication_year='>2020').search("relativistic klystron amplifier").\
        paginate(per_page=200,
                                                                    n_max=None)
    df = pd.DataFrame()
    for page in tqdm(rka_pager):
        dfpage = process_works_list(page)
        df = pd.concat([df, dfpage], ignore_index=True)
        df.drop_duplicates(subset='id', keep='first',inplace=True)
    return df 

In [23]:
def get_concept_frame(concepts_list:list, i:int):
    """
    takes a list of Concepts() results and an index
    and forms the pagination object to retrive the 
    records
    """
    pager = Works().filter(publication_year='>2015',
    #concepts={"id":f"{concepts_list[i]['id']}"}).filter(authorships={"institutions":{"country_code":"CN"}}).\
    #paginate(per_page=200,n_max=None)
    concepts={"id":f"{concepts_list[i]['id']}"}).\
    paginate(per_page=200,n_max=None)
    df = pd.DataFrame()
    for page in tqdm(pager):
        dfpage = process_works_list(page)
        df = pd.concat([df, dfpage], ignore_index=True)
        df.drop_duplicates(subset='id', keep='first',inplace=True)
    return df

In [24]:
frames_list = []
for i in range(len(jamming_concepts)):
    df = get_concept_frame(jamming_concepts, i)
    frames_list.append(df)

6it [00:10,  1.67s/it]
4it [00:06,  1.69s/it]
5it [00:07,  1.48s/it]
2it [00:02,  1.40s/it]


In [26]:
asat_places = Institutions().search_filter(display_name=places_search_term).get()
places = []
for i in range(len(asat_places)):
    id_, display_name, works_count = (asat_places[i]['id'],
    asat_places[i]['display_name'], asat_places[i]['works_count'])
    places.append((id_, display_name, works_count))

def get_place_frame(inst_list:list, i:int):
    """
    takes a list of Concepts() results and an index
    and forms the pagination object to retrive the 
    records
    """
    pager = Works().filter(publication_year='>2018',
    authorships={"institutions": {"id":f"{inst_list[i]['id']}"}}).\
    paginate(per_page=200,n_max=inst_list[i]['works_count'])
    df = pd.DataFrame()
    for page in tqdm(pager):
        dfpage = process_works_list(page)
        df = pd.concat([df, dfpage], ignore_index=True)
        df.drop_duplicates(subset='id', keep='first',inplace=True)
    return df

In [29]:
len(frames_list)

4

In [30]:
#for i in range(len(asat_places)):
#    df = get_place_frame(asat_places, i)
#    frames_list.append(df)

In [31]:
dfrka = get_rka_frame()

1it [00:01,  1.47s/it]


In [32]:
#dfhpm = get_hpm_frame()
dfsbl = get_sbl_frame()

19it [03:06,  9.81s/it]


In [33]:
#frames_list.append(dfsbl)
#frames_list.append(dfrka)

In [34]:
dftop = pd.concat(frames_list,
                  ignore_index=True)
dftop.drop_duplicates(subset='id', keep='first', 
                      inplace=True)

dftop.set_index('id', inplace=True, drop=False)

dfall = dftop
dfall.shape

dfall['content'] = dfall['title'] + ". " + dfall['abstract']

dfrecords = dfall[~dfall['content'].isna()].copy()

In [35]:
def get_keywords(text:str, top:int=7, stopwords=None):
    """
    takes a blob of text and 
    returns the top **top** 
    keywords as a list
    """
    kw_extractor = yake.KeywordExtractor(top=top, stopwords=stopwords)
    keywords = kw_extractor.extract_keywords(text)
    return [p[0] for p in keywords]

In [36]:
def get_top_concepts(concept_list:list,score:float=.6):
    """
    takes a list of concept dictionaries 
    returns the top **top** display_names;
    concepts whose score is >= score
    """
    return [c['display_name'] for c in concept_list if c['score'] >= score]

In [37]:
dfrecords['keywords'] = dfrecords['content'].apply(get_keywords)
dfrecords['top_concepts'] = dfrecords['concepts'].apply(get_top_concepts)

In [38]:
texts = dfrecords['content'].str.lower().values.tolist()
dfrecords.to_csv('jamming.csv')

In [39]:
def get_content_embeddings(dfrecords:pd.DataFrame) -> pd.DataFrame:
    """
    passes the preprocessed mitigation strings
    data through the embedding model to produce the vector
    space representation of each pet mitigation.
    """
    sent = Sentence("The grass is green.")
    DOC_EMBEDDINGS.embed(sent)
    texts = dfrecords["content"].str.lower().values.tolist()
    all_descriptions = np.empty((len(texts), len(sent.embedding)))
    for i in tqdm(range(len(texts))):
        sent = Sentence(texts[i])
        DOC_EMBEDDINGS.embed(sent)
        all_descriptions[i, :] = sent.embedding.cpu().numpy()
        # gc.collect()
        torch.cuda.empty_cache()
    dfcontentvectors = pd.DataFrame.from_records(all_descriptions, index=dfrecords.index)
    return dfcontentvectors

In [40]:
dfcontentvectors = get_content_embeddings(dfrecords)

100%|█████████████████████████████████████████████████████████████████████| 2562/2562 [00:31<00:00, 81.18it/s]


In [41]:
#umap.UMAP?
N_COMPONENTS = 2 # can visualize this way
umap_reducer = umap.UMAP(n_components=N_COMPONENTS,
                       #  metric='euclidean')
                         random_state=1234,
                         metric='cosine')  # can experiment with this metric as well as the other 
# parameters
# to see what other literature is in the same information space, we need to keep this umap_reducer 
# object as well as the gmm model below.

# Apply UMAP to the vectorized strings
reduced_vectors = umap_reducer.fit_transform(dfcontentvectors.to_numpy())
dfreduced = pd.DataFrame.from_records(reduced_vectors, 
                index=dfcontentvectors.index)
dfreduced.columns = ['x','y']

## use hdbscan to cluster

In [42]:
import hdbscan

hdbscan_args = {'min_cluster_size': 10,
                            'metric': 'euclidean',
                            'cluster_selection_method': 'eom',
               }

cluster = hdbscan.HDBSCAN(**hdbscan_args).fit(dfreduced[['x','y']].to_numpy())

dfreduced['cluster'] = cluster.labels_
dfreduced['probability'] = cluster.probabilities_

dfpapers = dfrecords.merge(dfreduced, left_index=True,
                           right_index=True)

In [43]:
#help(dfpapers.explode)
del dfpapers['id']
dfstart = dfpapers.reset_index()
dfstart.head()

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,language,primary_location,type,...,updated_date,created_date,abstract,content,keywords,top_concepts,x,y,cluster,probability
0,https://openalex.org/W2509257507,https://doi.org/10.2514/1.g000323,Closed-Loop Optimization of Guidance Gain for ...,Closed-Loop Optimization of Guidance Gain for ...,2017,2017-02-01,{'openalex': 'https://openalex.org/W2509257507...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,2023-08-15T23:17:26.008794,2016-09-16,No AccessComputational Guidance and ControlClo...,Closed-Loop Optimization of Guidance Gain for ...,"[0731-5090 LinkGoogle Scholar, JGCODS 0731-509...",[Astronautics],13.499785,5.620109,0,1.0
1,https://openalex.org/W2481447397,https://doi.org/10.1109/taes.2016.150415,Range-to-go weighted optimal guidance with imp...,Range-to-go weighted optimal guidance with imp...,2016,2016-06-01,{'openalex': 'https://openalex.org/W2481447397...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,2023-08-17T13:37:20.019138,2016-08-23,"In this paper, an impact angle control guidanc...",Range-to-go weighted optimal guidance with imp...,"[impact angle, impact angle constraint, angle,...","[Missile, Weighting, Control theory (sociology...",13.649143,5.818709,0,1.0
2,https://openalex.org/W2773666956,https://doi.org/10.1073/pnas.1714532114,Terminal attack trajectories of peregrine falc...,Terminal attack trajectories of peregrine falc...,2017,2017-12-04,{'openalex': 'https://openalex.org/W2773666956...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,...,2023-08-24T12:22:47.573615,2017-12-22,Significance Renowned as nature’s fastest pred...,Terminal attack trajectories of peregrine falc...,"[proportional navigation guidance, proportiona...","[Drone, Terminal (telecommunication), Intercep...",13.279628,5.113225,0,1.0
3,https://openalex.org/W2609622286,https://doi.org/10.1109/taes.2017.2698837,Impact-Time-Control Guidance Law With Constrai...,Impact-Time-Control Guidance Law With Constrai...,2017,2017-10-01,{'openalex': 'https://openalex.org/W2609622286...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,2023-08-24T09:33:53.147493,2017-05-05,An impact-time-control guidance (ITCG) law is ...,Impact-Time-Control Guidance Law With Constrai...,"[Seeker Look Angle, Guidance Law, Seeker, Angl...",[],13.946743,5.966068,0,1.0
4,https://openalex.org/W2795099673,https://doi.org/10.1016/j.ast.2018.03.042,A new sliding mode control design for integrat...,A new sliding mode control design for integrat...,2018,2018-07-01,{'openalex': 'https://openalex.org/W2795099673...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,...,2023-08-20T20:48:57.542014,2018-04-06,Abstract A new sliding mode control algorithm ...,A new sliding mode control design for integrat...,"[sliding mode control, integrated missile guid...","[Missile, Sliding mode control]",14.511933,7.262155,0,1.0


In [44]:
dfstart.shape

(2562, 45)

In [45]:
dfbig = dfstart.explode(column='authorships')
dfbig.shape, dfstart.shape

((8970, 45), (2562, 45))

In [46]:
dfbig.columns

Index(['id', 'doi', 'title', 'display_name', 'publication_year',
       'publication_date', 'ids', 'language', 'primary_location', 'type',
       'type_crossref', 'open_access', 'authorships',
       'institutions_distinct_count', 'corresponding_author_ids',
       'corresponding_institution_ids', 'apc_list', 'apc_paid',
       'cited_by_count', 'biblio', 'is_retracted', 'is_paratext', 'concepts',
       'mesh', 'locations_count', 'locations', 'best_oa_location',
       'sustainable_development_goals', 'grants', 'referenced_works_count',
       'referenced_works', 'related_works', 'ngrams_url', 'cited_by_api_url',
       'counts_by_year', 'updated_date', 'created_date', 'abstract', 'content',
       'keywords', 'top_concepts', 'x', 'y', 'cluster', 'probability'],
      dtype='object')

In [47]:
dfbig.locations.iloc[68]

[{'is_oa': False,
  'landing_page_url': 'https://doi.org/10.1016/j.ast.2019.01.016',
  'pdf_url': None,
  'source': {'id': 'https://openalex.org/S61564791',
   'display_name': 'Aerospace Science and Technology',
   'issn_l': '1270-9638',
   'issn': ['1626-3219', '1270-9638'],
   'is_oa': False,
   'is_in_doaj': False,
   'host_organization': 'https://openalex.org/P4310320990',
   'host_organization_name': 'Elsevier BV',
   'host_organization_lineage': ['https://openalex.org/P4310320990'],
   'host_organization_lineage_names': ['Elsevier BV'],
   'type': 'journal'},
  'license': None,
  'version': None,
  'is_accepted': False,
  'is_published': False}]

In [48]:
def add_extra_to_authorships(row: pd.DataFrame):
    """
    row[authorships] is a dictionary;
    add in the id key to that dictionary
    whose value is row[id]
    """
    complete_dict = row["authorships"]
   # assert type(complete_dict) == dict
    #print(type(complete_dict))
    if type(complete_dict) == dict:
        complete_dict["id"] = row["id"]
        complete_dict["x"] = row["x"]
        complete_dict["y"] = row["y"]
        complete_dict["cluster"] = row["cluster"]
        complete_dict["cluster_score"] = row["probability"]
        complete_dict["title"] = row["title"]
        complete_dict["abstract"] = row["abstract"]
        complete_dict["doi"] = row["doi"]
        complete_dict["publication_date"] = row["publication_date"]
        complete_dict["publication_year"] = row["publication_year"]
        complete_dict["grants"] = row["grants"]
        complete_dict["locations"] = row["locations"]
        return complete_dict
    else:
        return row["authorships"]

In [49]:
dfbig['big_authorships'] = dfbig.apply(add_extra_to_authorships, axis=1)

In [50]:
#dfbig['authorships'].tolist()
bigvals = dfbig['authorships'].tolist()

In [51]:
dictvals = [c for c in bigvals if type(c) != float]

In [52]:
dftriple = pd.json_normalize(dictvals,
                  record_path=['institutions'],
                  meta=['id','raw_affiliation_string','author_position', 'doi',
                        'title','abstract','publication_date', 'publication_year',
                        'grants','locations',
                        'is_corrresponding','x','y','cluster','cluster_score',
                       ['author','id'], ['author', 'display_name'],
                       ['author','orcid']],
                  errors='ignore',
                  sep='_',
                  meta_prefix='paper_',
                #  record_prefix='author_'
                 )

In [53]:
dftopics = dfcontentvectors.copy()
dftopics['cluster'] = dfpapers['cluster']
dfmeantopics = dftopics.groupby('cluster').mean().copy()
reduced_topics = umap_reducer.transform(dfmeantopics.to_numpy())
df_reduced_topics = pd.DataFrame.from_records(reduced_topics, 
                index=dfmeantopics.index)
df_reduced_topics.columns = ['x','y']
df_reduced_topics['topic'] = df_reduced_topics.index
df_reduced_topics.head()

def get_cluster_concepts(topic_num:int, n:int=20):
    """
    takes an integer topic_num corresponding to a 
    given topic number and
    returns the list of top n occuring concepts
    from the top_concept field
    """
    top_concepts = dfpapers[dfpapers['cluster'] == topic_num]['top_concepts'].tolist()
    flat_concepts = [item for sublist in top_concepts for item in sublist]
    concepts_dict = {c:flat_concepts.count(c) for c in flat_concepts}
    sorted_concepts = sorted(concepts_dict.items(), key=lambda x:x[1], reverse=True)
    return [c[0] for c in sorted_concepts][:n]

def get_yake_cluster_phrases(topic_num:int, n:int=20):
    """
    takes in an integer n corresponding
    to a given topic number and
    returns the list of keyphrases (TopicRank method)
    """
    documents = dfpapers[dfpapers['cluster'] == topic_num]['content'].tolist()
    topic_input = ". ".join(documents)
    #extractor = pke.unsupervised.TextRank()
    kw_extractor = yake.KeywordExtractor(top=n, stopwords=None)
    keywords = kw_extractor.extract_keywords(topic_input)
    #extractor.load_document(input=topic_input,
    #                    language='en',
    #                    normalization=None)

    #extractor.candidate_selection()

    #window = 2
    #use_stems = False
    #extractor.candidate_weighting(window=window,
    #                          use_stems=use_stems)
    #extractor.candidate_weighting()
    #threshold = 0.8
   # keyphrases = extractor.get_n_best(n=20, threshold=threshold)
    #keyphrases = extractor.get_n_best(n=n)
    return [p[0] for p in keywords]

wikiconcepts = df_reduced_topics['topic'].apply(get_cluster_concepts)

wikikeywords = df_reduced_topics['topic'].apply(get_yake_cluster_phrases)

dfpapers['id'] = dfpapers.index
dfinfo = dfpapers[['x','y','id','title','doi','cluster','grants',
                   'locations',
                 'publication_date','keywords','top_concepts']].copy()

centroids = dfinfo.groupby('cluster')[['x','y']].mean().copy()
centroids['concepts'] = wikiconcepts
centroids['cluster'] = centroids.index
centroids['keywords'] = wikikeywords

In [54]:
def wrap_it(x):
    return "<br>".join(textwrap.wrap(x, width=40))
   # return "<br>".join(textwrap.wrap(x.replace(r'\s+', ' '), width=40))


In [55]:
centroids['wrapped_keywords'] = centroids['keywords'].apply(str).apply(wrap_it)
centroids['wrapped_concepts'] = centroids['concepts'].apply(str).apply(wrap_it)

In [56]:
centroids.to_pickle('jammingcentroids2d.pkl')

In [57]:
dftriple.to_pickle('jammingdftriple2d.pkl')

In [58]:
def get_affils_cluster_sort(dc:pd.DataFrame, cl:int):
    """
    restricts the dataframe dc to cluster value cl
    and returns the results grouped by id, ror sorted
    by the some of probablity descending
    """
    dg = dc[dc['paper_cluster'] == cl].copy()
    print(cl)
    dv = dg.groupby(['id','display_name','country_code',
                     'type'])['paper_cluster_score'].sum().to_frame()
    dv.sort_values('paper_cluster_score', ascending=False, inplace=True)
    kw = centroids[centroids.cluster == cl]['keywords'].iloc[0]
    return dv, kw

In [59]:
dv84, kw84 = get_affils_cluster_sort(dftriple, 1)
print(kw84)
dv84.head(10)

1
['Small Hydraulic Actuation', 'Hydraulic Actuation System', 'Tactical Missile', 'Small Hydraulic', 'Hydraulic Actuation', 'Sliding PRI', 'DRFM', 'Network Centric Warfare', 'electronic warfare', 'Modeling and Simulation', 'NCW', 'PRI', 'warfare', 'Radar', 'Sliding', 'Centric Warfare', 'Actuation System', 'Network Centric', '네트워크', '논문에서는']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,paper_cluster_score
id,display_name,country_code,type,Unnamed: 4_level_1


In [60]:
dv84, kw84 = get_affils_cluster_sort(dftriple, 0)
print(kw84)
dv84.head(10)

0
['guidance law', 'Missile Guidance Law', 'cooperative guidance law', 'proposed guidance law', 'Guidance Law Based', 'missile guidance system', 'control guidance law', 'angle control guidance', 'guidance law design', 'Missile Guidance', 'navigation guidance law', 'proportional navigation guidance', 'missile guidance control', 'missile control system', 'mode guidance law', 'Guidance', 'optimal guidance law', 'Impact Angle Control', 'Missile', 'Based Guidance Law']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,paper_cluster_score
id,display_name,country_code,type,Unnamed: 4_level_1
https://openalex.org/I125839683,Beijing Institute of Technology,CN,education,255.0
https://openalex.org/I17145004,Northwestern Polytechnical University,CN,education,240.0
https://openalex.org/I82880672,Beihang University,CN,education,202.907999
https://openalex.org/I170215575,National University of Defense Technology,CN,education,154.0
https://openalex.org/I204983213,Harbin Institute of Technology,CN,education,116.0
https://openalex.org/I36399199,Nanjing University of Science and Technology,CN,education,79.0
https://openalex.org/I4210104252,Air Force Engineering University,CN,education,68.0
https://openalex.org/I139264467,Seoul National University,KR,education,53.0
https://openalex.org/I9842412,Nanjing University of Aeronautics and Astronautics,CN,education,50.0
https://openalex.org/I174306211,Technion – Israel Institute of Technology,IL,education,49.0


In [61]:
dv84, kw84 = get_affils_cluster_sort(dftriple, 16)
print(kw84)
dv84.head(20)

16
['electronic warfare', 'electronic warfare systems', 'microwave photonic', 'frequency', 'electronic', 'photonic', 'Eye Jamming System', 'Microwave', 'frequency measurement', 'systems', 'warfare', 'signals', 'instantaneous frequency measurement', 'radar', 'warfare systems', 'Photonics', 'System', 'microwave photonic technology', 'microwave frequency measurement', 'signal']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,paper_cluster_score
id,display_name,country_code,type,Unnamed: 4_level_1
https://openalex.org/I2800372957,China Electronics Technology Group Corporation,CN,company,10.0
https://openalex.org/I9842412,Nanjing University of Aeronautics and Astronautics,CN,education,8.647606
https://openalex.org/I19820366,Chinese Academy of Sciences,CN,government,7.775964
https://openalex.org/I4210165038,University of Chinese Academy of Sciences,CN,education,7.775964
https://openalex.org/I4210149211,Institute of Semiconductors,CN,facility,7.775964
https://openalex.org/I4210139481,Science and Technology Department of Sichuan Province,CN,government,7.0
https://openalex.org/I80143920,Shandong University of Science and Technology,CN,education,5.52398
https://openalex.org/I4210099310,Consorzio Nazionale Interuniversitario per le Telecomunicazioni,IT,nonprofit,4.722374
https://openalex.org/I20089843,Princeton University,US,education,3.153348
https://openalex.org/I4210086621,Electronics and Radar Development Establishment,IN,facility,3.0


In [62]:
#dv84, kw84 = get_affils_cluster_sort(dftriple, 3)
#print(kw84)
#dv84.head(20)

In [63]:
#dv84, kw84 = get_affils_cluster_sort(dftriple, 3)
#print(kw84)
#dv84.head(20)

In [64]:
#dv84, kw84 = get_affils_cluster_sort(dftriple, 6)
#print(kw84)
#dv84.head(20)

In [65]:
#dv84, kw84 = get_affils_cluster_sort(dftriple, 12)
#print(kw84)
#dv84.head(20)

In [66]:
dfinfo = dfpapers[['x','y','id','title','doi','cluster','probability',
                 'publication_date','grants','locations',
                   'keywords','top_concepts']].copy()

In [67]:
dfpapers['primary_location'].iloc[58]

{'is_oa': False,
 'landing_page_url': 'https://doi.org/10.1109/taes.2019.2948722',
 'pdf_url': None,
 'source': {'id': 'https://openalex.org/S193624734',
  'display_name': 'IEEE Transactions on Aerospace and Electronic Systems',
  'issn_l': '0018-9251',
  'issn': ['1557-9603', '0018-9251', '2371-9877'],
  'is_oa': False,
  'is_in_doaj': False,
  'host_organization': 'https://openalex.org/P4310319808',
  'host_organization_name': 'Institute of Electrical and Electronics Engineers',
  'host_organization_lineage': ['https://openalex.org/P4310319808'],
  'host_organization_lineage_names': ['Institute of Electrical and Electronics Engineers'],
  'type': 'journal'},
 'license': None,
 'version': None,
 'is_accepted': False,
 'is_published': False}

In [68]:
dfpapers['locations'].iloc[58]

[{'is_oa': False,
  'landing_page_url': 'https://doi.org/10.1109/taes.2019.2948722',
  'pdf_url': None,
  'source': {'id': 'https://openalex.org/S193624734',
   'display_name': 'IEEE Transactions on Aerospace and Electronic Systems',
   'issn_l': '0018-9251',
   'issn': ['1557-9603', '0018-9251', '2371-9877'],
   'is_oa': False,
   'is_in_doaj': False,
   'host_organization': 'https://openalex.org/P4310319808',
   'host_organization_name': 'Institute of Electrical and Electronics Engineers',
   'host_organization_lineage': ['https://openalex.org/P4310319808'],
   'host_organization_lineage_names': ['Institute of Electrical and Electronics Engineers'],
   'type': 'journal'},
  'license': None,
  'version': None,
  'is_accepted': False,
  'is_published': False}]

In [69]:
pap_affils_dict = dftriple.groupby('paper_id')['paper_raw_affiliation_string'].\
apply(lambda x: ' | '.join(x.tolist()))

#pap_authors_dict = dftriple.groupby('paper_id')['paper_author_display_name'].\
#apply(lambda x: ' | '.join(x.tolist()))

In [70]:
pap_authors_dict = dftriple.groupby('paper_id')['paper_author_display_name'].apply(lambda x: x.values)


In [71]:
dfinfo['affil_list'] = pap_affils_dict
dfinfo['author_list'] = pap_authors_dict

In [72]:
dfinfo['affil_list'] = dftriple.groupby('paper_id')['paper_raw_affiliation_string'].\
apply(lambda x: x.tolist())

In [73]:
dfinfo['author_list'] =  dftriple.groupby('paper_id')['paper_author_display_name'].\
apply(lambda x: x.tolist())


In [74]:
dfinfo['wrapped_affil_list'] = dfinfo['affil_list'].apply(str).apply(wrap_it)
dfinfo['wrapped_author_list'] = dfinfo['author_list'].apply(str).apply(wrap_it)

In [75]:
dfinfo['wrapped_keywords'] = dfinfo['keywords'].apply(str).apply(wrap_it)

In [76]:
dfinfo['locations'].iloc[69]

[{'is_oa': False,
  'landing_page_url': 'https://doi.org/10.2514/1.g004139',
  'pdf_url': None,
  'source': {'id': 'https://openalex.org/S25157213',
   'display_name': 'Journal of Guidance Control and Dynamics',
   'issn_l': '0731-5090',
   'issn': ['1533-3884', '0731-5090'],
   'is_oa': False,
   'is_in_doaj': False,
   'host_organization': 'https://openalex.org/P4310315709',
   'host_organization_name': 'American Institute of Aeronautics and Astronautics',
   'host_organization_lineage': ['https://openalex.org/P4310315709'],
   'host_organization_lineage_names': ['American Institute of Aeronautics and Astronautics'],
   'type': 'journal'},
  'license': None,
  'version': None,
  'is_accepted': False,
  'is_published': False}]

In [77]:
def get_source_name(loc_list):
    """
    grab the first item in the list;
    retturn the display name
    """
    try:
        primary = loc_list[0]
        return primary["source"]["display_name"]
    except:
        return None

def get_source_type(loc_list):
    """
    grab the first item in the list;
    return the source type
    """
    try:
        primary = loc_list[0]
        return primary["source"]["type"]
    except:
        return None
    
    

In [78]:
dfinfo["source"] = dfinfo["locations"].apply(get_source_name)
dfinfo["source_type"] = dfinfo["locations"].apply(get_source_type)

In [79]:
dfinfo["source"].value_counts()

source
IEEE Transactions on Aerospace and Electronic Systems                85
IEEE Access                                                          62
Journal of physics                                                   58
Lecture notes in electrical engineering                              44
Aerospace Science and Technology                                     33
                                                                     ..
Journal Of The Chinese Institute Of Engineers                         1
Frontiers in Physics                                                  1
Measurement                                                           1
Journal of Electromagnetic Waves and Applications                     1
Lecture notes on data engineering and communications technologies     1
Name: count, Length: 522, dtype: int64

In [80]:
dfinfo["source_type"].value_counts()

source_type
journal           1180
conference         248
book series        105
ebook platform      62
repository          46
Name: count, dtype: int64

In [81]:
dfinfo[dfinfo["source_type"] == "conference"]

Unnamed: 0_level_0,x,y,id,title,doi,cluster,probability,publication_date,grants,locations,keywords,top_concepts,affil_list,author_list,wrapped_affil_list,wrapped_author_list,wrapped_keywords,source,source_type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
https://openalex.org/W3010604679,11.085700,4.058947,https://openalex.org/W3010604679,Ballistic Missile Maneuver Penetration Based o...,https://doi.org/10.1109/gncc42960.2018.9018872,0,1.0,2018-08-01,[],"[{'is_oa': False, 'landing_page_url': 'https:/...","[Missile Maneuver Penetration, Ballistic Missi...","[Reinforcement learning, Missile, Ballistic mi...","[Beihang University,State Key Laboratory of In...","[Chaojie Yang, Jiang Wu, Guoqing Liu, Yuncan Z...","['Beihang University,State Key<br>Laboratory o...","['Chaojie Yang', 'Jiang Wu', 'Guoqing<br>Liu',...","['Missile Maneuver Penetration',<br>'Ballistic...","2018 IEEE CSAA Guidance, Navigation and Contro...",conference
https://openalex.org/W3112857541,11.172089,4.645629,https://openalex.org/W3112857541,Deep Learning Based Missile Trajectory Prediction,https://doi.org/10.1109/icus50048.2020.9274953,0,1.0,2020-11-27,[{'funder': 'https://openalex.org/F4320321001'...,"[{'is_oa': False, 'landing_page_url': 'https:/...","[Learning Based Missile, Deep Learning Based, ...","[Missile, Trajectory, Computer science, Artifi...","[Harbin Institute of Technology,School of Astr...","[Zijian Wang, Wei Wei]","['Harbin Institute of Technology,School<br>of ...","['Zijian Wang', 'Wei Wei']","['Learning Based Missile', 'Deep<br>Learning B...",2020 3rd International Conference on Unmanned ...,conference
https://openalex.org/W2585412663,12.940574,6.210493,https://openalex.org/W2585412663,Missile guidance systems for UAS landing appli...,,0,1.0,2016-12-01,[],"[{'is_oa': False, 'landing_page_url': 'http://...","[UAS landing application, Missile guidance sys...","[Guidance system, Missile guidance]","[Faculty of Military Technology, University of...","[Vadim Stary, Radek Doskocil, Vaclav Krivanek,...","['Faculty of Military Technology,<br>Universit...","['Vadim Stary', 'Radek Doskocil',<br>'Vaclav K...","['UAS landing application', 'Missile<br>guidan...",International Conference on Mechatronics - Mec...,conference
https://openalex.org/W2894959321,13.017600,5.683993,https://openalex.org/W2894959321,The Ballistic Design of Intercepting the Ultra...,https://doi.org/10.1088/1757-899x/408/1/012029,0,1.0,2018-10-01,[],"[{'is_oa': True, 'landing_page_url': 'https://...","[Ultra-low Altitude Target, Intercepting the U...","[Trajectory, Missile, Missile guidance]","[Air Force Engineering University, Xi’an, 7100...","[Xinpeng Ma, Ruikang Xing, Xinghao Ran, Hairui...","['Air Force Engineering University,<br>Xi’an, ...","['Xinpeng Ma', 'Ruikang Xing', 'Xinghao<br>Ran...","['Ultra-low Altitude Target',<br>'Intercepting...",IOP conference series,conference
https://openalex.org/W2995205143,13.155504,6.865627,https://openalex.org/W2995205143,Research on Maneuvering Trajectory Tracking Ac...,https://doi.org/10.1088/1757-899x/677/2/022053,0,1.0,2019-12-01,[],"[{'is_oa': True, 'landing_page_url': 'https://...","[trajectory tracking guidance, tracking guidan...","[Trajectory, Linearization, Control theory (so...",[Shijiazhuang Campus of Army Engineering Unive...,"[Jifeng Yu, Suochang Yang, Yunwei Zhang]",['Shijiazhuang Campus of Army<br>Engineering U...,"['Jifeng Yu', 'Suochang Yang', 'Yunwei<br>Zhang']","['trajectory tracking guidance',<br>'tracking ...",IOP conference series,conference
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
https://openalex.org/W4205186662,2.916523,3.715200,https://openalex.org/W4205186662,System of systems lessons to be learned in the...,https://doi.org/10.2514/6.2022-1471,21,1.0,2022-01-03,[],"[{'is_oa': False, 'landing_page_url': 'https:/...","[systems, systems engineering, systems of syst...",[],"[Saab AB Linkoping, Forsvarshogskolan, Saab AB...","[Karl Kindström Andersson, Kent Andersson, Chr...","['Saab AB Linkoping',<br>'Forsvarshogskolan', ...","['Karl Kindström Andersson', 'Kent<br>Andersso...","['systems', 'systems engineering',<br>'systems...",AIAA SCITECH 2022 Forum,conference
https://openalex.org/W4250871769,3.416761,3.549632,https://openalex.org/W4250871769,Research on the Application of Network Command...,https://doi.org/10.1145/3495018.3495505,21,1.0,2021-10-23,[],"[{'is_oa': False, 'landing_page_url': 'https:/...","[Computer Network Engineering, Network Command...","[Computer science, Network engineering]","[Jiangxi University of Engineering, China, Jia...","[Xujun Liu, Jiang Hu]","['Jiangxi University of Engineering,<br>China'...","['Xujun Liu', 'Jiang Hu']","['Computer Network Engineering',<br>'Network C...",2021 3rd International Conference on Artificia...,conference
https://openalex.org/W4312371406,3.414328,3.368807,https://openalex.org/W4312371406,A Literature Review of Resiliency Technologies...,https://doi.org/10.1109/dsit55514.2022.9943919,21,1.0,2022-07-22,[],"[{'is_oa': False, 'landing_page_url': 'https:/...","[Literature Review, Software Defined Networks,...","[Network-centric warfare, Computer science]","[Academy of Military Sciences,Network informat...","[Guozhu YAN, Qiongyu Wu, Rongbing Chen, Lan Du...","['Academy of Military Sciences,Network<br>info...","['Guozhu YAN', 'Qiongyu Wu', 'Rongbing<br>Chen...","['Literature Review', 'Software Defined<br>Net...",2022 5th International Conference on Data Scie...,conference
https://openalex.org/W4317928099,3.422061,3.386949,https://openalex.org/W4317928099,Tactical Topology Optimization Methodology for...,https://doi.org/10.1109/milcom55135.2022.10017781,21,1.0,2022-11-28,[],"[{'is_oa': False, 'landing_page_url': 'https:/...","[Slice Aware, Aware and Reconfigurable, Reconf...",[Computer science],"[US Army Futures Command, DEVCOM Analysis Cent...","[Anthony Castanares, Deepak K. Tosh]","['US Army Futures Command, DEVCOM<br>Analysis ...","['Anthony Castanares', 'Deepak K. Tosh']","['Slice Aware', 'Aware and<br>Reconfigurable',...",MILCOM 2022 - 2022 IEEE Military Communication...,conference


In [82]:
dfinfo.columns

Index(['x', 'y', 'id', 'title', 'doi', 'cluster', 'probability',
       'publication_date', 'grants', 'locations', 'keywords', 'top_concepts',
       'affil_list', 'author_list', 'wrapped_affil_list',
       'wrapped_author_list', 'wrapped_keywords', 'source', 'source_type'],
      dtype='object')

In [83]:
dftriple.columns

Index(['id', 'display_name', 'ror', 'country_code', 'type', 'paper_id',
       'paper_raw_affiliation_string', 'paper_author_position', 'paper_doi',
       'paper_title', 'paper_abstract', 'paper_publication_date',
       'paper_publication_year', 'paper_grants', 'paper_locations',
       'paper_is_corrresponding', 'paper_x', 'paper_y', 'paper_cluster',
       'paper_cluster_score', 'paper_author_id', 'paper_author_display_name',
       'paper_author_orcid'],
      dtype='object')

In [84]:
dfinfo.to_pickle('jammingdfinfo2d.pkl')

In [85]:
dftriple.columns

Index(['id', 'display_name', 'ror', 'country_code', 'type', 'paper_id',
       'paper_raw_affiliation_string', 'paper_author_position', 'paper_doi',
       'paper_title', 'paper_abstract', 'paper_publication_date',
       'paper_publication_year', 'paper_grants', 'paper_locations',
       'paper_is_corrresponding', 'paper_x', 'paper_y', 'paper_cluster',
       'paper_cluster_score', 'paper_author_id', 'paper_author_display_name',
       'paper_author_orcid'],
      dtype='object')

In [86]:
dftriple['paper_grants'].iloc[67]

[{'funder': 'https://openalex.org/F4320321001',
  'funder_display_name': 'National Natural Science Foundation of China',
  'award_id': '61573161'},
 {'funder': 'https://openalex.org/F4320321001',
  'funder_display_name': 'National Natural Science Foundation of China',
  'award_id': '61473130'},
 {'funder': 'https://openalex.org/F4320321001',
  'funder_display_name': 'National Natural Science Foundation of China',
  'award_id': '61473124'},
 {'funder': 'https://openalex.org/F4320321106',
  'funder_display_name': "Ministry of Education of the People's Republic of China",
  'award_id': '20120142120091'}]

grab the list of **funder_display_names** for each work; each funder display name will be a node, lineked to the other node types in that row of dftriple. ok. 

In [87]:
def get_funder_names(funder_list):
    """
    funder_list is a list of dictionaries
    with three keys; return the list of 
    unique **funder_display_name**
    values
    """
    try:
        funder_names = list(set([f['funder_display_name'] for f in funder_list]))
        return funder_names    
    except:
        return []
        

In [88]:
dftriple["source"] = dftriple["paper_locations"].apply(get_source_name)
dftriple["source_type"] = dftriple["paper_locations"].apply(get_source_type)
dftriple["funder_list"] = dftriple["paper_grants"].apply(get_funder_names)

In [89]:
#dftriple[dftriple['paper_grants']][['paper_grants','funder_list']]

In [90]:
dftriple.to_pickle('jammingdftriple2d.pkl')

make another function with the top journals for that cluster. OK.
Coutries, Affiliations, Atuthors amd sources/journals. distinguish between journals and conferences.

add those two functions to the bottom. yes. nice. 

In [91]:
dftriple['source'].value_counts()

source
IEEE Transactions on Aerospace and Electronic Systems                                  302
IEEE Access                                                                            249
Journal of physics                                                                     161
Remote Sensing                                                                         144
Lecture notes in electrical engineering                                                125
                                                                                      ... 
Meždunarodnaâ analitika                                                                  1
Discrete Dynamics in Nature and Society                                                  1
2022 International Conference on Big Data, Information and Computer Network (BDICN)      1
Prace Instytutu Lotnictwa                                                                1
Journal of Science and Technique                                                   

In [92]:
dftriple['source_type'].value_counts()

source_type
journal           3502
conference         820
book series        302
ebook platform      89
repository          65
Name: count, dtype: int64

In [93]:
def get_journals_cluster_sort(dc:pd.DataFrame, cl:int):
    """
    restricts the dataframe dc to cluster value cl
    and returns the results grouped by source (where
    source_type == 'journal') sorted
    by the some of probablity descending
    """
    dg = dc[dc['paper_cluster'] == cl].copy()
    print(cl)
    dv = dg[dg['source_type'] == 'journal'].groupby(['source'])['paper_cluster_score'].sum().to_frame()
    dv.sort_values('paper_cluster_score', ascending=False, inplace=True)
    kw = centroids[centroids.cluster == cl]['keywords'].iloc[0]
    return dv, kw

In [94]:
def get_conferences_cluster_sort(dc:pd.DataFrame, cl:int):
    """
    restricts the dataframe dc to cluster value cl
    and returns the results grouped by source (where
    source_type == 'journal') sorted
    by the some of probablity descending
    """
    dg = dc[dc['paper_cluster'] == cl].copy()
    print(cl)
    dv = dg[dg['source_type'] == 'conference'].groupby(['source'])['paper_cluster_score'].sum().to_frame()
    dv.sort_values('paper_cluster_score', ascending=False, inplace=True)
    kw = centroids[centroids.cluster == cl]['keywords'].iloc[0]
    return dv, kw

In [95]:
dv84, kw84 = get_journals_cluster_sort(dftriple, 1)
print(kw84)
dv84.head(10)

1
['Small Hydraulic Actuation', 'Hydraulic Actuation System', 'Tactical Missile', 'Small Hydraulic', 'Hydraulic Actuation', 'Sliding PRI', 'DRFM', 'Network Centric Warfare', 'electronic warfare', 'Modeling and Simulation', 'NCW', 'PRI', 'warfare', 'Radar', 'Sliding', 'Centric Warfare', 'Actuation System', 'Network Centric', '네트워크', '논문에서는']


Unnamed: 0_level_0,paper_cluster_score
source,Unnamed: 1_level_1


In [96]:
dv84, kw84 = get_conferences_cluster_sort(dftriple, 1)
print(kw84)
dv84.head(10)

1
['Small Hydraulic Actuation', 'Hydraulic Actuation System', 'Tactical Missile', 'Small Hydraulic', 'Hydraulic Actuation', 'Sliding PRI', 'DRFM', 'Network Centric Warfare', 'electronic warfare', 'Modeling and Simulation', 'NCW', 'PRI', 'warfare', 'Radar', 'Sliding', 'Centric Warfare', 'Actuation System', 'Network Centric', '네트워크', '논문에서는']


Unnamed: 0_level_0,paper_cluster_score
source,Unnamed: 1_level_1


# Country - Country Collaborations

want to report back though which countries are involved as well. ok.

In [97]:
def get_country_collaborations_sort(dc:pd.DataFrame, cl:int):
    """
    resticts the dataframe dc to cluster value cl
    and returns the results of paper_id s where there is 
    more than one country_code
    """
    dg = dc[dc['paper_cluster'] == cl].copy()
    dv = dg.groupby('paper_id')['country_code'].apply(lambda x: len(set(x.values))).to_frame()
    dc = dg.groupby('paper_id')['country_code'].apply(lambda x: list(set(x.values))).to_frame()
    dc.columns = ['collab_countries']
    dv.columns = ['country_count']
    dv['collab_countries'] = dc['collab_countries']
    dv.sort_values('country_count',ascending=False, inplace=True)
    di = dfinfo.loc[dv.index].copy()
    di['country_count'] = dv['country_count']
    di['collab_countries'] = dv['collab_countries']
    return di[di['country_count'] > 1]

In [98]:
dv = get_country_collaborations_sort(dftriple, 0)
dv

Unnamed: 0_level_0,x,y,id,title,doi,cluster,probability,publication_date,grants,locations,...,top_concepts,affil_list,author_list,wrapped_affil_list,wrapped_author_list,wrapped_keywords,source,source_type,country_count,collab_countries
paper_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
https://openalex.org/W2809355594,13.247190,4.968714,https://openalex.org/W2809355594,Application of Sub-Optimal MPSC guidance for T...,https://doi.org/10.1016/j.ifacol.2018.05.011,0,1.0,2018-01-01,[],"[{'is_oa': True, 'landing_page_url': 'https://...",...,"[Trajectory optimization, Aerospace, Trajectory]","[CAE Engineer, Zeus Numerix Pvt. Ltd, Pune, IN...","[Akshay Pal, Abhishek Pancholy, Abhishek Panch...","['CAE Engineer, Zeus Numerix Pvt. Ltd,<br>Pune...","['Akshay Pal', 'Abhishek Pancholy',<br>'Abhish...","['Terminal angle constraint', 'Terminal<br>ang...",IFAC-PapersOnLine,journal,4,"[GR, ES, IN, US]"
https://openalex.org/W2914786265,13.662374,6.376733,https://openalex.org/W2914786265,Smooth Interpolation-Based Fixed-Final-Time Co...,https://doi.org/10.1109/taes.2019.2897038,0,1.0,2019-12-01,[{'funder': 'https://openalex.org/F4320321001'...,"[{'is_oa': False, 'landing_page_url': 'https:/...",...,"[Interpolation (computer graphics), Missile, B...","[[Beijing Institute of Technology, Beijing, Ch...","[Haichao Hong, Arnab Maity, Florian Holzapfel,...","['[Beijing Institute of Technology,<br>Beijing...","['Haichao Hong', 'Arnab Maity', 'Florian<br>Ho...","['command generation approach', 'Command<br>Ge...",IEEE Transactions on Aerospace and Electronic ...,journal,3,"[CN, IN, DE]"
https://openalex.org/W4313009818,11.160408,4.024714,https://openalex.org/W4313009818,Online intelligent maneuvering penetration met...,https://doi.org/10.3934/era.2022221,0,1.0,2022-01-01,[],"[{'is_oa': True, 'landing_page_url': 'https://...",...,"[Missile, Penetration (warfare), Reinforcement...","[Division of Dynamics and Control, School of M...","[Yao-Kun Wang, Kun Zhao, Juan Luis García Guir...","['Division of Dynamics and Control,<br>School ...","['Yao-Kun Wang', 'Kun Zhao', 'Juan Luis<br>Gar...","['maneuvering penetration methods',<br>'intell...",Electronic research archive,journal,3,"[CN, ES, SA]"
https://openalex.org/W3147730670,12.697542,5.724387,https://openalex.org/W3147730670,Application of methods of conditional multidim...,https://doi.org/10.38013/2542-0542-2017-3-59-62,0,1.0,2017-09-30,[],"[{'is_oa': False, 'landing_page_url': 'https:/...",...,"[Ballistic missile, Missile, Trajectory, Minif...",[Joint Stock Company “Concern “Radio Technical...,"[Anastasia Dubrovina, Anastasia Dubrovina, Ana...",['Joint Stock Company “Concern “Radio<br>Techn...,"['Anastasia Dubrovina', 'Anastasia<br>Dubrovin...","['trajectory calculation problem',<br>'ballist...",Вестник Концерна ВКО «Алмаз – Антей»,journal,3,"[GB, RU, US]"
https://openalex.org/W4224084481,14.149807,5.521064,https://openalex.org/W4224084481,Unified Method for Field-of-View-Limited Homin...,https://doi.org/10.2514/1.g006710,0,1.0,2022-08-01,[{'funder': 'https://openalex.org/F4320321001'...,"[{'is_oa': False, 'landing_page_url': 'https:/...",...,"[Autopilot, Computer science]","[Beijing Institute of Technology, 100081 Beiji...","[Wei Dong, Chunyan Wang, Jianan Wang, Hungsun ...","['Beijing Institute of Technology,<br>100081 B...","['Wei Dong', 'Chunyan Wang', 'Jianan<br>Wang',...","['Homing Guidance', 'Guidance',<br>'baseline g...",Journal of Guidance Control and Dynamics,journal,3,"[CN, US, KR]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
https://openalex.org/W2461374934,12.414166,4.942739,https://openalex.org/W2461374934,3-D Trajectory Planning of Aerial Vehicles Usi...,https://doi.org/10.1109/tcst.2016.2582144,0,1.0,2017-05-01,[],"[{'is_oa': False, 'landing_page_url': 'https:/...",...,"[Trajectory, Random tree, Motion planning]",[The Geo-Informatics and Space Technology Deve...,"[Pawit Pharpatara, Bruno Hérissé, Yasmina Best...",['The Geo-Informatics and Space<br>Technology ...,"['Pawit Pharpatara', 'Bruno Hérissé',<br>'Yasm...","['Trajectory Planning', 'trajectory<br>plannin...",IEEE Transactions on Control Systems and Techn...,journal,2,"[TH, FR]"
https://openalex.org/W4381198693,13.028254,4.750542,https://openalex.org/W4381198693,Missile Threat Detection and Evasion Maneuvers...,https://doi.org/10.1109/taes.2023.3287153,0,1.0,2023-01-01,[],"[{'is_oa': False, 'landing_page_url': 'https:/...",...,"[Missile, Survivability, Proportional navigati...",[Department of Electrical and Computer Enginee...,"[Zijiao Tian, Meir Danino, Yaakov Bar-Shalom, ...",['Department of Electrical and Computer<br>Eng...,"['Zijiao Tian', 'Meir Danino', 'Yaakov<br>Bar-...","['Missile Threat Detection', 'Air<br>Defense S...",IEEE Transactions on Aerospace and Electronic ...,journal,2,"[US, IL]"
https://openalex.org/W4381838779,11.976180,5.826131,https://openalex.org/W4381838779,Design of Missile Guidance Law Using Takagi-Su...,https://doi.org/10.1109/access.2023.3277537,0,1.0,2023-01-01,[],"[{'is_oa': True, 'landing_page_url': 'https://...",...,"[Missile, Missile guidance, Control theory (so...",[Faculty of Electrical and Electronic Engineer...,"[Duc Hung Pham, Duc Hung Pham, Chih-Min Lin, V...",['Faculty of Electrical and Electronic<br>Engi...,"['Duc Hung Pham', 'Duc Hung Pham',<br>'Chih-Mi...","['Missile Guidance Law', 'Missile<br>Guidance'...",IEEE Access,journal,2,"[TW, VN]"
https://openalex.org/W2509257507,13.499785,5.620109,https://openalex.org/W2509257507,Closed-Loop Optimization of Guidance Gain for ...,https://doi.org/10.2514/1.g000323,0,1.0,2017-02-01,[],"[{'is_oa': False, 'landing_page_url': 'https:/...",...,[Astronautics],"[Beijing Institute of Technology, 100081, Beij...","[Xinfu Liu, Zuojun Shen, Ping Lu]","['Beijing Institute of Technology,<br>100081, ...","['Xinfu Liu', 'Zuojun Shen', 'Ping Lu']","['0731-5090 LinkGoogle Scholar', 'JGCODS<br>07...",Journal of Guidance Control and Dynamics,journal,2,"[CN, US]"


In [99]:
dfinfo.head()

Unnamed: 0_level_0,x,y,id,title,doi,cluster,probability,publication_date,grants,locations,keywords,top_concepts,affil_list,author_list,wrapped_affil_list,wrapped_author_list,wrapped_keywords,source,source_type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
https://openalex.org/W2509257507,13.499785,5.620109,https://openalex.org/W2509257507,Closed-Loop Optimization of Guidance Gain for ...,https://doi.org/10.2514/1.g000323,0,1.0,2017-02-01,[],"[{'is_oa': False, 'landing_page_url': 'https:/...","[0731-5090 LinkGoogle Scholar, JGCODS 0731-509...",[Astronautics],"[Beijing Institute of Technology, 100081, Beij...","[Xinfu Liu, Zuojun Shen, Ping Lu]","['Beijing Institute of Technology,<br>100081, ...","['Xinfu Liu', 'Zuojun Shen', 'Ping Lu']","['0731-5090 LinkGoogle Scholar', 'JGCODS<br>07...",Journal of Guidance Control and Dynamics,journal
https://openalex.org/W2481447397,13.649143,5.818709,https://openalex.org/W2481447397,Range-to-go weighted optimal guidance with imp...,https://doi.org/10.1109/taes.2016.150415,0,1.0,2016-06-01,[],"[{'is_oa': False, 'landing_page_url': 'https:/...","[impact angle, impact angle constraint, angle,...","[Missile, Weighting, Control theory (sociology...","[PGM R&D Lab, LIG Nex1, Seongnam, Republic of ...","[Bong-Gyun Park, Tae-Hun Kim, Min-Jea Tahk]","['PGM R&D Lab, LIG Nex1, Seongnam,<br>Republic...","['Bong-Gyun Park', 'Tae-Hun Kim', 'Min-<br>Jea...","['impact angle', 'impact angle<br>constraint',...",IEEE Transactions on Aerospace and Electronic ...,journal
https://openalex.org/W2773666956,13.279628,5.113225,https://openalex.org/W2773666956,Terminal attack trajectories of peregrine falc...,https://doi.org/10.1073/pnas.1714532114,0,1.0,2017-12-04,[],"[{'is_oa': True, 'landing_page_url': 'https://...","[proportional navigation guidance, proportiona...","[Drone, Terminal (telecommunication), Intercep...","[Department of Zoology, University of Oxford, ...","[Caroline H. Brighton, Adrian L. R. Thomas, Gr...","['Department of Zoology, University of<br>Oxfo...","['Caroline H. Brighton', 'Adrian L. R.<br>Thom...","['proportional navigation guidance',<br>'propo...",Proceedings of the National Academy of Science...,journal
https://openalex.org/W2609622286,13.946743,5.966068,https://openalex.org/W2609622286,Impact-Time-Control Guidance Law With Constrai...,https://doi.org/10.1109/taes.2017.2698837,0,1.0,2017-10-01,[],"[{'is_oa': False, 'landing_page_url': 'https:/...","[Seeker Look Angle, Guidance Law, Seeker, Angl...",[],"[Agency for Defense Development, Daejeon South...","[In-Soo Jeon, Jin-Ik Lee]","['Agency for Defense Development,<br>Daejeon S...","['In-Soo Jeon', 'Jin-Ik Lee']","['Seeker Look Angle', 'Guidance Law',<br>'Seek...",IEEE Transactions on Aerospace and Electronic ...,journal
https://openalex.org/W2795099673,14.511933,7.262155,https://openalex.org/W2795099673,A new sliding mode control design for integrat...,https://doi.org/10.1016/j.ast.2018.03.042,0,1.0,2018-07-01,[{'funder': 'https://openalex.org/F4320321001'...,"[{'is_oa': False, 'landing_page_url': 'https:/...","[sliding mode control, integrated missile guid...","[Missile, Sliding mode control]","[Institute of Precision Guidance and Control, ...","[Jianguo Guo, Yi Xiong, Jun Zhou]","[""Institute of Precision Guidance and<br>Contr...","['Jianguo Guo', 'Yi Xiong', 'Jun Zhou']","['sliding mode control', 'integrated<br>missil...",Aerospace Science and Technology,journal


In [100]:
dftriple.head()

Unnamed: 0,id,display_name,ror,country_code,type,paper_id,paper_raw_affiliation_string,paper_author_position,paper_doi,paper_title,...,paper_x,paper_y,paper_cluster,paper_cluster_score,paper_author_id,paper_author_display_name,paper_author_orcid,source,source_type,funder_list
0,https://openalex.org/I125839683,Beijing Institute of Technology,https://ror.org/01skt4w74,CN,education,https://openalex.org/W2509257507,"Beijing Institute of Technology, 100081, Beiji...",first,https://doi.org/10.2514/1.g000323,Closed-Loop Optimization of Guidance Gain for ...,...,13.499785,5.620109,0,1.0,https://openalex.org/A5048425943,Xinfu Liu,,Journal of Guidance Control and Dynamics,journal,[]
1,https://openalex.org/I82880672,Beihang University,https://ror.org/00wk2mp56,CN,education,https://openalex.org/W2509257507,"Beihang University, 100191 Beijing, People ’ s...",middle,https://doi.org/10.2514/1.g000323,Closed-Loop Optimization of Guidance Gain for ...,...,13.499785,5.620109,0,1.0,https://openalex.org/A5087466200,Zuojun Shen,https://orcid.org/0000-0003-3315-7162,Journal of Guidance Control and Dynamics,journal,[]
2,https://openalex.org/I26538001,San Diego State University,https://ror.org/0264fdx42,US,education,https://openalex.org/W2509257507,"San Diego State University, San Diego, Calif...",last,https://doi.org/10.2514/1.g000323,Closed-Loop Optimization of Guidance Gain for ...,...,13.499785,5.620109,0,1.0,https://openalex.org/A5055716192,Ping Lu,https://orcid.org/0000-0003-3684-1365,Journal of Guidance Control and Dynamics,journal,[]
3,https://openalex.org/I4210089444,GS Caltex (South Korea),https://ror.org/00bvkj141,KR,company,https://openalex.org/W2481447397,"PGM R&D Lab, LIG Nex1, Seongnam, Republic of K...",first,https://doi.org/10.1109/taes.2016.150415,Range-to-go weighted optimal guidance with imp...,...,13.649143,5.818709,0,1.0,https://openalex.org/A5067656272,Bong-Gyun Park,https://orcid.org/0000-0003-0917-726X,IEEE Transactions on Aerospace and Electronic ...,journal,[]
4,https://openalex.org/I2801036362,Agency for Defense Development,https://ror.org/05fhe0r85,KR,government,https://openalex.org/W2481447397,"Agency for Defense Development , Daejeon, Repu...",middle,https://doi.org/10.1109/taes.2016.150415,Range-to-go weighted optimal guidance with imp...,...,13.649143,5.818709,0,1.0,https://openalex.org/A5043731787,Tae-Hun Kim,https://orcid.org/0000-0001-8606-351X,IEEE Transactions on Aerospace and Electronic ...,journal,[]


In [101]:
jamming_concepts

[{'id': 'https://openalex.org/C522053795',
  'wikidata': 'https://www.wikidata.org/wiki/Q2335090',
  'display_name': 'Missile guidance',
  'relevance_score': 8928.525,
  'level': 3,
  'description': 'variety of methods of guiding a missile',
  'works_count': 6157,
  'cited_by_count': 27073,
  'summary_stats': {'2yr_mean_citedness': 1.0793103448275863,
   'h_index': 69,
   'i10_index': 532},
  'ids': {'openalex': 'https://openalex.org/C522053795',
   'wikidata': 'https://www.wikidata.org/wiki/Q2335090',
   'mag': '522053795',
   'wikipedia': 'https://en.wikipedia.org/wiki/Missile%20guidance'},
  'image_url': 'https://upload.wikimedia.org/wikipedia/commons/6/68/Image-GBU-24_Missile_testmontage-gi_BLU-109_bomb.jpg',
  'image_thumbnail_url': 'https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Image-GBU-24_Missile_testmontage-gi_BLU-109_bomb.jpg/42px-Image-GBU-24_Missile_testmontage-gi_BLU-109_bomb.jpg',
  'international': {'display_name': {'ar': 'توجيه الصاروخ',
    'be-tarask': 'сы

# Co-authorship Network


Streamlit with pyvis: https://towardsdatascience.com/how-to-deploy-interactive-pyvis-network-graphs-on-streamlit-6c401d4c99db

the data source is **dftriple**; let a user interactively select **which type of graph** , the selection of node types, to display. otherwise its just too too much.

Can display works and authors; construct that first:

In [102]:
dftriple.columns

Index(['id', 'display_name', 'ror', 'country_code', 'type', 'paper_id',
       'paper_raw_affiliation_string', 'paper_author_position', 'paper_doi',
       'paper_title', 'paper_abstract', 'paper_publication_date',
       'paper_publication_year', 'paper_grants', 'paper_locations',
       'paper_is_corrresponding', 'paper_x', 'paper_y', 'paper_cluster',
       'paper_cluster_score', 'paper_author_id', 'paper_author_display_name',
       'paper_author_orcid', 'source', 'source_type', 'funder_list'],
      dtype='object')

group **dftriple** by **paper_id** and get a list of all the **paper_author_id** values. and then from that list get all distinct subsets of two paper_author_ids.   accumulate that list.  and then we will haave a weighted undirected graph.

file://wsl.localhost/Ubuntu/home/davidd/2023/SWITCHBOARD/switchboard-mitigations-sort/graphvizmaker.html

In [103]:
import networkx as nx
from pyvis.network import Network
import igraph as ig # for getting a layout w/o relying on slow pyvis physics 

In [104]:
#help(Network)

file://wsl.localhost/Ubuntu/home/davidd/2023/SWITCHBOARD/switchboard-mitigations-sort/graphvizmaker.html

In [105]:
dftriple.columns

Index(['id', 'display_name', 'ror', 'country_code', 'type', 'paper_id',
       'paper_raw_affiliation_string', 'paper_author_position', 'paper_doi',
       'paper_title', 'paper_abstract', 'paper_publication_date',
       'paper_publication_year', 'paper_grants', 'paper_locations',
       'paper_is_corrresponding', 'paper_x', 'paper_y', 'paper_cluster',
       'paper_cluster_score', 'paper_author_id', 'paper_author_display_name',
       'paper_author_orcid', 'source', 'source_type', 'funder_list'],
      dtype='object')

In [106]:
dfinfo.columns

Index(['x', 'y', 'id', 'title', 'doi', 'cluster', 'probability',
       'publication_date', 'grants', 'locations', 'keywords', 'top_concepts',
       'affil_list', 'author_list', 'wrapped_affil_list',
       'wrapped_author_list', 'wrapped_keywords', 'source', 'source_type'],
      dtype='object')

In [107]:
dfinfo["funder_list"] = dfinfo["grants"].apply(get_funder_names)
dfinfo["wrapped_funder_list"] = dfinfo["funder_list"].apply(str).apply(wrap_it)

In [108]:
dfinfo.to_pickle('jammingdfinfo2d.pkl')

In [109]:
dfinfo[['id','keywords','wrapped_keywords','wrapped_funder_list']].head()

Unnamed: 0_level_0,id,keywords,wrapped_keywords,wrapped_funder_list
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
https://openalex.org/W2509257507,https://openalex.org/W2509257507,"[0731-5090 LinkGoogle Scholar, JGCODS 0731-509...","['0731-5090 LinkGoogle Scholar', 'JGCODS<br>07...",[]
https://openalex.org/W2481447397,https://openalex.org/W2481447397,"[impact angle, impact angle constraint, angle,...","['impact angle', 'impact angle<br>constraint',...",[]
https://openalex.org/W2773666956,https://openalex.org/W2773666956,"[proportional navigation guidance, proportiona...","['proportional navigation guidance',<br>'propo...",[]
https://openalex.org/W2609622286,https://openalex.org/W2609622286,"[Seeker Look Angle, Guidance Law, Seeker, Angl...","['Seeker Look Angle', 'Guidance Law',<br>'Seek...",[]
https://openalex.org/W2795099673,https://openalex.org/W2795099673,"[sliding mode control, integrated missile guid...","['sliding mode control', 'integrated<br>missil...",['National Natural Science Foundation of<br>Ch...


add the paper yake keywords to the hover of the paper nodes:

In [110]:
kw_dict = dfinfo['keywords'].to_dict()

In [111]:
dftriple[['source','source_type']].head()

Unnamed: 0,source,source_type
0,Journal of Guidance Control and Dynamics,journal
1,Journal of Guidance Control and Dynamics,journal
2,Journal of Guidance Control and Dynamics,journal
3,IEEE Transactions on Aerospace and Electronic ...,journal
4,IEEE Transactions on Aerospace and Electronic ...,journal


In [112]:
dftriple.head()

Unnamed: 0,id,display_name,ror,country_code,type,paper_id,paper_raw_affiliation_string,paper_author_position,paper_doi,paper_title,...,paper_x,paper_y,paper_cluster,paper_cluster_score,paper_author_id,paper_author_display_name,paper_author_orcid,source,source_type,funder_list
0,https://openalex.org/I125839683,Beijing Institute of Technology,https://ror.org/01skt4w74,CN,education,https://openalex.org/W2509257507,"Beijing Institute of Technology, 100081, Beiji...",first,https://doi.org/10.2514/1.g000323,Closed-Loop Optimization of Guidance Gain for ...,...,13.499785,5.620109,0,1.0,https://openalex.org/A5048425943,Xinfu Liu,,Journal of Guidance Control and Dynamics,journal,[]
1,https://openalex.org/I82880672,Beihang University,https://ror.org/00wk2mp56,CN,education,https://openalex.org/W2509257507,"Beihang University, 100191 Beijing, People ’ s...",middle,https://doi.org/10.2514/1.g000323,Closed-Loop Optimization of Guidance Gain for ...,...,13.499785,5.620109,0,1.0,https://openalex.org/A5087466200,Zuojun Shen,https://orcid.org/0000-0003-3315-7162,Journal of Guidance Control and Dynamics,journal,[]
2,https://openalex.org/I26538001,San Diego State University,https://ror.org/0264fdx42,US,education,https://openalex.org/W2509257507,"San Diego State University, San Diego, Calif...",last,https://doi.org/10.2514/1.g000323,Closed-Loop Optimization of Guidance Gain for ...,...,13.499785,5.620109,0,1.0,https://openalex.org/A5055716192,Ping Lu,https://orcid.org/0000-0003-3684-1365,Journal of Guidance Control and Dynamics,journal,[]
3,https://openalex.org/I4210089444,GS Caltex (South Korea),https://ror.org/00bvkj141,KR,company,https://openalex.org/W2481447397,"PGM R&D Lab, LIG Nex1, Seongnam, Republic of K...",first,https://doi.org/10.1109/taes.2016.150415,Range-to-go weighted optimal guidance with imp...,...,13.649143,5.818709,0,1.0,https://openalex.org/A5067656272,Bong-Gyun Park,https://orcid.org/0000-0003-0917-726X,IEEE Transactions on Aerospace and Electronic ...,journal,[]
4,https://openalex.org/I2801036362,Agency for Defense Development,https://ror.org/05fhe0r85,KR,government,https://openalex.org/W2481447397,"Agency for Defense Development , Daejeon, Repu...",middle,https://doi.org/10.1109/taes.2016.150415,Range-to-go weighted optimal guidance with imp...,...,13.649143,5.818709,0,1.0,https://openalex.org/A5043731787,Tae-Hun Kim,https://orcid.org/0000-0001-8606-351X,IEEE Transactions on Aerospace and Electronic ...,journal,[]


In [113]:
dftriple.columns

Index(['id', 'display_name', 'ror', 'country_code', 'type', 'paper_id',
       'paper_raw_affiliation_string', 'paper_author_position', 'paper_doi',
       'paper_title', 'paper_abstract', 'paper_publication_date',
       'paper_publication_year', 'paper_grants', 'paper_locations',
       'paper_is_corrresponding', 'paper_x', 'paper_y', 'paper_cluster',
       'paper_cluster_score', 'paper_author_id', 'paper_author_display_name',
       'paper_author_orcid', 'source', 'source_type', 'funder_list'],
      dtype='object')

In [118]:
dc = dftriple[dftriple['paper_cluster'] == 10].copy()
dc.shape

(72, 26)

need to flatten the funder_list column

In [120]:
[x for row in dc['funder_list'].tolist() for x in row]

['China Postdoctoral Science Foundation',
 'National Natural Science Foundation of China',
 'China Postdoctoral Science Foundation',
 'National Natural Science Foundation of China',
 'China Postdoctoral Science Foundation',
 'National Natural Science Foundation of China',
 'China Postdoctoral Science Foundation',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',
 'National Natural Science Foundation of China',

In [121]:
kw_dict = dfinfo['keywords'].to_dict()

# add in the affiliations as nodes as well; that row, author, paper, affil. all three get links. ok.
def create_nx_graph(df: pd.DataFrame, cl:int) -> nx.Graph:
    """
    takes the dataframe df, and creates the undirected graph
    from the source and target columns for each row.
    """
    g = nx.Graph() # dc['paper_cluster'] == cl
    dc = df[df['paper_cluster'] == cl]
    author_counts = dc['paper_author_id'].tolist()
    author_counts_dict = {c:author_counts.count(c) for c in author_counts}
    affiliation_counts = dc['id'].tolist()
    affiliation_counts_dict = {c:affiliation_counts.count(c) for c in affiliation_counts}
    source_counts = dc['source'].tolist()
    source_counts_dict = {c:source_counts.count(c) for c in source_counts}
    funder_counts = [x for row in dc['funder_list'].tolist() for x in row]
    funder_counts_dict = {c:funder_counts.count(c) for c in funder_counts}
    for index, row in df[df['paper_cluster'] == cl].iterrows():
        g.add_node(row['paper_id'], group='work', title=row['paper_title'])
        g.add_node(row['paper_author_id'], title=row['paper_author_display_name'],
                   group='author',value = author_counts_dict[row['paper_author_id']])
        g.add_node(row['id'], group='affiliation',
                   title=row['display_name'] + '\n' + row['country_code'],
                  value = affiliation_counts_dict[row['id']])
        if row['source']:
            g.add_node(row['source'], group=row['source_type'],
                      title=row['source'] + ' :\n ' + row['source_type'],
                      value=source_counts_dict[row['source']])
            g.add_edge(
                row['paper_id'],
                row['source'],
                title=row['paper_title'] + ' :\n ' + str(row['paper_publication_date']) +  \
                ' :\n' + row['source'] + ' :\n ' + \
                row['source_type'],
              #  weight = df[(df['paper_id'] == row['paper_id']) & \
              #              (df['source'] == row['source'])]['paper_cluster_score'].sum()
               # weight = row['paper_cluster_score']
            )
            g.add_edge(
                row['paper_author_id'],
                row['source'],
                title=row['paper_author_display_name'] + ':\n' + row['source'],
             #   weight = df[(df['paper_author_id'] == row['paper_author_id']) & \
              #              (df['source'] == row['source'])]['paper_cluster_score'].sum()
               # weight = row['paper_cluster_score']
            )
        if len(row['funder_list']) > 0:
            for f in row['funder_list']:
                g.add_node(f, group='funder',
                          title=str(f),
                          value = founder_counts_dict[f]),
                g.add_edge(
                       row['paper_id'],
                       f,
                       title=row['paper_title'] + ':\n ' +  str(row['paper_publication_date']) + \
                       ' :\n' + str(f),
                  #  weight = row['paper_cluster_score']
                   )
                g.add_edge(
                       f,
                       row['paper_author_id'],
                       title=row['paper_author_display_name'] + ' :\n ' + \
                       str(f),
                  #  weight = row['paper_cluster_score']
                       
                   )
                g.add_edge(
                       f,
                       row['id'],
                       title=row['display_name'] + '\n' + row['country_code'] + ' :\n ' + \
                       str(f)  ,
                  #  weight = row['paper_cluster_score']
                   )  
                if row["source"]:
                    g.add_edge(
                        f,
                        row["source"],
                        title=row["source"] + ' :\n' + str(f),
                     #   weight = row['paper_cluster_score']
                    )
        g.nodes[row['paper_id']]['title'] = (
            row['paper_title'] + ' :\n ' + str(row['paper_publication_date'] + ':\n' + 
            '\n'.join(kw_dict[row['paper_id']]))
        )
        g.nodes[row['paper_author_id']]['title'] = (
            row['paper_author_display_name']
        )
        g.add_edge(
            row['paper_id'],
            row['paper_author_id'],
        title=row['paper_title'] + ' :\n ' + row['paper_author_display_name'] + ' :\n ' + \
            row['paper_raw_affiliation_string'],
         #   weight = row['paper_cluster_score']
        )
        g.add_edge(
            row['paper_author_id'],
            row['id'],
            title=row['paper_author_display_name'] + ' :\n ' + \
            row['display_name'] + ' :\n ' + row['country_code'],
          #  weight = row['paper_cluster_score']
        )
        g.add_edge(
            row['paper_id'],
            row['id'],
            title=row['paper_title'] + ' :\n ' + str(row['paper_publication_date']) + ':\n' + 
            row['display_name'] + ' :\n ' + row['country_code'],
         #   weight = row['paper_cluster_score']
        )
        
    g_ig = ig.Graph.from_networkx(g) # assign 'x', and 'y' to g before returning
    #layout = g_ig.layout_auto()
    #layout = g_ig.layout_davidson_harel()
    layout = g_ig.layout_umap(min_dist = 2, epochs = 500)
    # https://igraph.org/python/tutorial/0.9.6/visualisation.html
    coords = layout.coords
    allnodes = list(g.nodes())
    coords_dict = {allnodes[i]:(coords[i][0], coords[i][1]) for i in range(len(allnodes))}
    for i in g.nodes():
        g.nodes[i]['x'] = 250 * coords_dict[i][0] # the scale factor needed 
        g.nodes[i]['y'] = 250 * coords_dict[i][1]
    return g
                

In [115]:
def create_pyvis_html(cl: int, filename: str = "pyvis_coauthorships_graph.html"):
    """
    wrapper function that calls create_nx_graph to finally 
    produce an interactive pyvis standalone html file
    """
    g_nx = create_nx_graph(dftriple, cl);
    h = Network(height="1000px",
          #  heading="Mitigations and Techniques Relationships",
                width="100%",
                cdn_resources="remote", # can grab the visjs library to make this local if needed
            # probably should
                bgcolor="#222222",
            neighborhood_highlight=True,
              # default_node_size=1,
                font_color="white",
                directed=False,
               # select_menu=True,
                filter_menu=True,
                notebook=False,
               )
    #h.repulsion()
    h.from_nx(g_nx, show_edge_weights=False)
    #h.barnes_hut()
    #h.repulsion(node_distance=40,
    #            central_gravity=-0.2, spring_length=5, spring_strength=0.005, damping=0.09)
    neighbor_map = h.get_adj_list()
   # for node in h.nodes:
   #     if node['group'] == 'author':
   #         a = list(neighbor_map[node["id"]]) # want to insert a "\n" into every third element of a
   #     if node['group'] == 'work':
   #         a = list(neighbor_map[node["id"]])
   #     i = 3
   #     while i < len(a):
   #         a.insert(i, "\n")
   #         i += 4
   #     node["title"] += "\n Neighbors: \n" + " | ".join(a)
   #     node["value"] = len(neighbor_map[node["id"]]) 
# "physics": {
#    "enabled": false
#  },
    h.set_options(
    """
const options = {
  "interaction": {
    "navigationButtons": false
  },
 "physics": {
     "enabled": false
 },
  "edges": {
    "color": {
        "inherit": true
    },
    "setReferenceSize": null,
    "setReference": {
        "angle": 0.7853981633974483
    },
    "smooth": {
        "forceDirection": "none"
    }
  }
  }
    """
    )
    #h.show_buttons(filter_=['physics'])
  #  h.barnes_hut()
    #h.repulsion()
    try:
        path = './tmp'
        h.save_graph(f"{path}/{filename}")
        HtmlFile = open(f"{path}/{filename}","r",
                        encoding='utf-8')
    except:
        h.save_graph(f"{filename}")
        HtmlFile = open(f"{filename}", "r",
                        encoding="utf-8")
    return h

In [117]:
h = create_pyvis_html(13)

In [104]:
type(h)

pyvis.network.Network

In [107]:
help(h.set_options)

Help on method set_options in module pyvis.network:

set_options(options) method of pyvis.network.Network instance
    Overrides the default options object passed to the VisJS framework.
    Delegates to the :meth:`options.Options.set` routine.
    
    :param options: The string representation of the Javascript-like object
                    to be used to override default options.
    
    :type options: str



In [109]:
help(h.repulsion)

Help on method repulsion in module pyvis.network:

repulsion(node_distance=100, central_gravity=0.2, spring_length=200, spring_strength=0.05, damping=0.09) method of pyvis.network.Network instance
    Set the physics attribute of the entire network to repulsion.
    When called, it sets the solver attribute of physics to repulsion.
    
    :param node_distance: This is the range of influence for the repulsion.
    :param central_gravity: The gravity attractor to pull the entire network
                            to the center.
    :param spring_length: The rest length of the edges
    :param spring_strength: The strong the edges springs are
    :param damping: A value ranging from 0 to 1 of how much of the velocity
                    from the previous physics simulation iteration carries
                    over to the next iteration.
    
    :type node_distance: int
    :type central_gravity float
    :type spring_length: int
    :type spring_strength: float
    :type damping: flo

In [99]:
# create networkx graph object from pandas dataframe
g = nx.from_pandas_edgelist(dftriple, 'paper_id','paper_author_id')
# initiate PyVis network object


relnet = Network(height='456px', bgcolor='#222222', font_color='white')
# take Networkx graph and translate it to a PyVis graph format
#relnet.from_nx(G)
g_ig = ig.Graph.from_networkx(g) # assign 'x', and 'y' to g before returning
layout = g_ig.layout_auto()
coords = layout.coords
allnodes = list(g.nodes())
coords_dict = {allnodes[i]:(coords[i][0], coords[i][1]) for i in range(len(allnodes))}
for i in g.nodes():
    g.nodes[i]['x'] = 500 * coords_dict[i][0] # the scale factor needed 
    g.nodes[i]['y'] = 500 * coords_dict[i][1]
#    return g
relnet.from_nx(g)

In [100]:
# generate network with specific layout settings
#relnet.repulsion(node_distance=420, central_gravity=0.33,
#                 spring_length=110, spring_strength=0.10,
#                 damping = 0.95)

relnet.toggle_physics(False)

```python
 # Save and read graph as HTML file (on Streamlit Sharing)
    try:
        path = '/tmp'
        drug_net.save_graph(f'{path}/pyvis_graph.html')
        HtmlFile = open(f'{path}/pyvis_graph.html', 'r', encoding='utf-8')

    # Save and read graph as HTML file (locally)
    except:
        path = '/html_files'
        drug_net.save_graph(f'{path}/pyvis_graph.html')
        HtmlFile = open(f'{path}/pyvis_graph.html', 'r', encoding='utf-8')

    # Load HTML file in HTML component for display on Streamlit page
    components.html(HtmlFile.read(), height=435)
```