# Research topic impact


In [45]:
from google.cloud import bigquery
import pandas as pd
import plotly.express as px
import plotly.offline as pyo
pyo.init_notebook_mode(connected=True)

In [21]:
# Global constants
PROJECT_ID = 'collaboration-recommender'
AUTHOR_NAME = 'Blaž Zupan'
AUTHOR_SID = '713a7a792274caa79b9796d0ace7f081'
# Initialize global variables
bq_client = bigquery.Client(project=PROJECT_ID)

In [20]:
# Get the author's profile
author_query = f"""
SELECT *
FROM PROD.DIM_AUTHOR
WHERE AUTHOR_FULL_NAME LIKE '%{AUTHOR_NAME}%'
"""

author_df = bq_client.query(author_query).to_dataframe()
author_df

Unnamed: 0,AUTHOR_SID,AUTHOR_ORCID_ID,IS_AUTHOR_MORE_COLLABORATIVE,IS_EUTOPIA_AUTHOR,AUTHOR_FULL_NAME
0,713a7a792274caa79b9796d0ace7f081,0000-0001-7094-1023,False,True,Blaž Zupan
1,ee9e8057c9556c02196d672b563dd8d5,,False,True,Blaž Zupančič


In [22]:
# Get the author's profile
one_author_query = f"""
SELECT *
FROM PROD.DIM_AUTHOR
WHERE AUTHOR_SID = '{AUTHOR_SID}'
"""

one_author_df = bq_client.query(one_author_query).to_dataframe()
one_author_df

Unnamed: 0,AUTHOR_SID,AUTHOR_ORCID_ID,IS_AUTHOR_MORE_COLLABORATIVE,IS_EUTOPIA_AUTHOR,AUTHOR_FULL_NAME
0,713a7a792274caa79b9796d0ace7f081,0000-0001-7094-1023,False,True,Blaž Zupan


In [24]:
author = one_author_df.to_dict(orient='records')[0]
author

{'AUTHOR_SID': '713a7a792274caa79b9796d0ace7f081',
 'AUTHOR_ORCID_ID': '0000-0001-7094-1023',
 'IS_AUTHOR_MORE_COLLABORATIVE': False,
 'IS_EUTOPIA_AUTHOR': True,
 'AUTHOR_FULL_NAME': 'Blaž Zupan'}

In [25]:
articles_query = f"""
WITH AUTHOR_ARTICLES AS (
    SELECT DISTINCT ARTICLE_SID
    FROM PROD.FCT_COLLABORATION
    WHERE AUTHOR_SID = '{AUTHOR_SID}'
)
SELECT *
FROM PROD.DIM_ARTICLE A 
    INNER JOIN AUTHOR_ARTICLES AA 
        ON A.ARTICLE_SID = AA.ARTICLE_SID
"""

articles_df = bq_client.query(articles_query).to_dataframe()

In [35]:
for i in articles_df['ARTICLE_TITLE']:
    print(i)
    print()

Textual features for corpus visualization using correspondence analysis

Finding Patterns in Class-Labeled Data Using Data Visualization

The development of an entrepreneurial mindset in primary education

Gene network inference by probabilistic scoring of relationships from a factorized model of interactions

Visualization-based cancer microarray data classification analysis

Genome Sequence of a Lethal Strain of Xylem-Invading
            <i>Verticillium nonalfalfae</i>

What can education bring to entrepreneurship? Formal versus non-formal education

scOrange—a tool for hands-on training of concepts from single-cell data analytics

Data-Driven Revision of Decision Models

Orthogonal matrix factorization enables integrative analysis of multiple RNA binding proteins

Data Imputation in Epistatic MAPs by Network-Guided Matrix Completion

Gene network inference by fusing data from diverse distributions

Nomograms for visualizing support vector machines

Concurrent software architectures

In [37]:
collaboration_query = f"""
WITH AUTHOR_COLLABORATIONS AS (SELECT C1.AUTHOR_SID                  AS MAIN_AUTHOR,
                                      C2.AUTHOR_SID                  AS CO_AUTHOR,
                                      COUNT(DISTINCT C1.ARTICLE_SID) AS SHARED_ARTICLE_COUNT,
                                      SUM(A.ARTICLE_CITATION_COUNT)  AS TOTAL_CITATIONS
                               FROM `collaboration-recommender.PROD.FCT_COLLABORATION` C1
                                        JOIN `collaboration-recommender.PROD.FCT_COLLABORATION` C2
                                             ON C1.ARTICLE_SID = C2.ARTICLE_SID
                                        JOIN `collaboration-recommender.PROD.FCT_ARTICLE` a
                                             ON C1.ARTICLE_SID = a.ARTICLE_SID
                               WHERE C1.AUTHOR_SID = '{AUTHOR_SID}'
                                 AND C1.AUTHOR_SID <> C2.AUTHOR_SID -- Exclude the main author from being listed as their own collaborator
                               GROUP BY C1.AUTHOR_SID, C2.AUTHOR_SID)

SELECT MAIN_AUTHOR,
       CO_AUTHOR,
       SHARED_ARTICLE_COUNT,
       TOTAL_CITATIONS
FROM AUTHOR_COLLABORATIONS
ORDER BY SHARED_ARTICLE_COUNT DESC, TOTAL_CITATIONS DESC;
"""

collaboration_df = bq_client.query(collaboration_query).to_dataframe()

In [47]:
# Plotting with Plotly
fig = px.bar(collaboration_df, x='CO_AUTHOR', y='SHARED_ARTICLE_COUNT', text='TOTAL_CITATIONS',
             hover_data=['TOTAL_CITATIONS'],
             labels={
                 'CO_AUTHOR': 'Co-Author',
                 'SHARED_ARTICLE_COUNT': 'Number of Shared Articles',
                 'TOTAL_CITATIONS': 'Total Citations'
             },
             title='Collaboration Metrics for Author with SID your_author_sid_here')

fig.show()