This is a testing notebook to expore and transform data returned by Dimensions BigQuery.

In [6]:
from datetime import datetime
import pandas as pd
from bq import BigQuery
from dimensions import DimensionsAnalytics


bq = BigQuery()
bq.add_dataset(
    name='publications',
    project_id='ucsd-discover',
    dataset='dimensions',
    table='ucsd_publications',
    billing_project_id='ucsd-discover'
)
analytics = DimensionsAnalytics(bq)

# Example filters
filters = {
    "dateRange": {
        "from": "2021-01-01",
        "to": "2022-12-30"
    },
    "citationCount": {
        "min": 1,
        "max": None
    }
}
result = analytics.publication_analytics.get_basic_stats('publications', filters)


Executing query:

WITH filtered_pubs AS (
    SELECT
        EXTRACT(YEAR FROM date_normal) as pub_year,
        document_type.classification as doc_type,
        COALESCE(citations_count, 0) as citations_count,
        COALESCE(metrics.field_citation_ratio, 0) as field_citation_ratio,
        COALESCE(metrics.relative_citation_ratio, 0) as relative_citation_ratio,
        COALESCE(metrics.recent_citations, 0) as recent_citations,
        COALESCE(altmetrics.score, 0) as altmetric_score,
        concepts,
        pubmed.mesh.terms as mesh_terms,
        categories.sdg_v2021.full as sdg_categories,
        ARRAY_LENGTH(COALESCE(research_orgs, [])) as collaboration_count,
        ARRAY_LENGTH(COALESCE(research_org_countries, [])) as international_collaboration_count,
        ARRAY_LENGTH(COALESCE(authors, [])) as author_count,
        ARRAY_LENGTH(COALESCE(citations, [])) as citation_references_count,
        ARRAY_LENGTH(COALESCE(clinical_trial_ids, [])) as clinical_trials_count,
     

In [7]:
print(result.head(5))

   year  total_publications  research_article_count  review_article_count  \
0  2021                1493                    1055                   183   
1  2022                1621                    1237                   193   
2  9999                3114                    2292                   376   

   research_chapter_count  conference_paper_count  reference_work_count  \
0                       9                      40                     4   
1                      16                      32                     0   
2                      25                      72                     4   

   editorial_count  other_journal_count  letter_to_editor_count  ...  \
0               35                    5                      52  ...   
1               28                    3                      28  ...   
2               63                    8                      80  ...   

                                        top_concepts  \
0  [{'concept_text': 'COVID-19', 'concept_cou