# Fields under fire

Determine the grants terminated by fields of study relating to "LGBTQ+", "Transgender health", "HIV" and "Vaccine hesitancy" as a proportion of total NIH grants for 2024.

In [56]:
# Setup
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

In [57]:
df_airtable = pd.read_csv("/content/2024-04-09-nih_terminations_airtable.csv")
total_airtable_count = len(df_airtable)
print(f"There are {total_airtable_count} rows in the Airtable dataframe.")

There are 772 rows in the Airtable dataframe.


In [58]:
# Drop rows where 'fiscal_year' is not 2024
df_airtable_2014 = df_airtable[df_airtable['fiscal_year'] == 2024]

In [59]:
df_airtable_2014['fiscal_year'].value_counts()

Unnamed: 0_level_0,count
fiscal_year,Unnamed: 1_level_1
2024,514


In [5]:
# Drop rows were 'project_type' is not 'Full grant'
df_airtable_full_grant = df_airtable_2014[df_airtable_2014['project_type'] == 'Full grant']

In [60]:
df_airtable_full_grant['project_type'].value_counts()

Unnamed: 0_level_0,count
project_type,Unnamed: 1_level_1
Full grant,497


In [61]:
# Drop all columns except 'project_title', 'abstract_text', 'project_type' and 'fiscal_year'
df_airtable_edit = df_airtable_full_grant[['project_title', 'abstract_text', 'project_type', 'fiscal_year']]

In [62]:
df_airtable_edit.head()

Unnamed: 0,project_title,abstract_text,project_type,fiscal_year
0,Investigating and identifying the heterogeneit...,PROJECT SUMMARY/ ABSTRACT In the midst of the ...,Full grant,2024
1,Cabotegravir PrEP: Actionable Robust Evidence ...,"PROJECT SUMMARY Despite high efficacy, oral pr...",Full grant,2024
2,Sexual orientation-related disparities in obst...,PROJECT SUMMARY/ABSTRACT Compared to heterose...,Full grant,2024
3,Trial of Human Milk Oligosaccharide-based synb...,Project Summary HIV-exposed uninfected (HEU) i...,Full grant,2024
4,NY Community-Hospital-Academic Maternal Health...,OVERALL: PROJECT SUMMARY/ABSTRACT The broad go...,Full grant,2024


In [63]:
print(f"There are {len(df_airtable_edit)} grants where fiscal year is 2024 and project type is Full Grant.")

There are 497 grants where fiscal year is 2024 and project type is Full Grant.


## Search NIH reporter

https://reporter.nih.gov/search/ZS7AEb7vAU-pKBKTRnicRQ/projects

Fiscal year: 2024

Agency/Institute/Center: NIH

Admin: Yes

Funding: Yes

Award Type: New, Competing Renewal, Noncompeting

Project End Date: On or After: 02/20/2025

Limit to: Project Title, Project Abstracts

In [64]:
df_transgender = pd.read_csv("/content/transgender-SearchResult_Export_09Apr2025_082356.csv", skiprows=11)

In [65]:
for project_title in df_transgender['Project Title'].head():
  print(project_title)

PRIDEnet for the All of Us Research Program 
A Gender-Affirming Stigma Intervention to Improve Substance Misuse and HIV Risk among Transgender Women
Strategies to Prevent HIV Acquisition Among Transgender MSM in the US
Measures of structural stigmatization and discrimination for HIV research with Latine sexual and gender minorities
Structural Stigma and Mental Health Among Transgender and Gender Diverse Adults Living in the Rural United States


In [66]:
df_hiv = pd.read_csv("/content/hiv-SearchResult_Export_09Apr2025_082556.csv", skiprows=11)

In [67]:
for project_title in df_hiv['Project Title'].head():
  print(project_title)

Developing Cyclopeptide Nef Inhibitors to Facilitate HIV-1 Eradication
Targeting Siglec-9/Sialoglycan Interactions to Enhance NK Functions During HIV Infection
Implementing Integrated Services for People With HIV and Opioid Use Disorder
EBV-Positive Diffuse Large B-cell Lymphoma: Defining Biologic Determinants of Disease Pathogenesis in Immunodeficiency
Neural and Perceptual Mechanisms for Coding Frequency Modulation


In [68]:
df_lgbtq = pd.read_csv("/content/lgbtq-SearchResult_Export_09Apr2025_082703.csv", skiprows=11)

In [69]:
for project_title in df_lgbtq['Project Title'].head():
  print(project_title)

PRIDEnet for the All of Us Research Program 
Structural Stigma and Mental Health Among Transgender and Gender Diverse Adults Living in the Rural United States
#EverythingSucks: Understanding the bidirectional relations between vulnerability to internalizing symptoms in youth (13-20) and social mediacontent
Supportive and restrictive factors and mental health in LGBT adolescent and young adult populations
Multilevel strategies to understand and modify the role of structural and environmental context on HIV inequities for sexual and gender minorities of color


In [70]:
df_vaccine_hesitancy = pd.read_csv("/content/vaccine-hesitancy-SearchResult_Export_10Apr2025_035737.csv", skiprows=11)

In [71]:
for project_title in df_vaccine_hesitancy['Project Title'].head():
  print(project_title)

COVID-19 Vaccine Coverage and General Vaccine Hesitancy in Rural Areas in the United States
Influence of Social Media Social Networks and Misinformation on Vaccine Acceptance Among Black and Latinx Individuals
Community - based behavioral intervention to increase COVID - 19 and influenza vaccination for African American/ Black and Latino persons: An optimization randomized controlled trial
Using a Telehealth Model to Address Vaccine Hesitancy and Increase Vaccine Completion Among Communities in Southeastern Louisiana
Conversational Agents to Improve HPV Vaccine Acceptance in Primary Care


## Compare NIH reporter results with list of cancelled projects

In [72]:
data = []

In [73]:
def get_results_by_field(field_of_study, df):
  """
  df is the list of projects by topic, exported from the NIH reporter
  df_airtable is the list of terminated projects

  1. Find the number of terminated projects which are in the NIH reporter export
  2. Determine what proportion of projects listed in the NIH reported have been cancelled
  """
  common_titles = []
  for title1 in df_airtable["project_title"]:
      for title2 in df["Project Title"]:
          # Basic string matching (case-insensitive)
          if isinstance(title1, str) and isinstance(title2, str) and title1.lower() in title2.lower():
              common_titles.append(title1)
          elif isinstance(title1, str) and isinstance(title2, str) and title2.lower() in title1.lower():
              common_titles.append(title2)

  # Remove duplicates while preserving order
  common_titles_unique = []
  for title in common_titles:
    if title not in common_titles_unique:
      common_titles_unique.append(title)

  print(f"Number of projects in NIH reporter {len(df)}")
  print(f"Number of common titles found: {len(common_titles_unique)}")

  percent_common_titles = len(common_titles_unique) / len(df) * 100

  print(f"Percent common titles: {percent_common_titles:.2f}%")

  # Append a dictionary for the current term to the list
  data.append({
      'field_of_study': field_of_study,
      'nih_count': len(df),
      'num_common_titles': len(common_titles_unique),
      'percent_common_titles': percent_common_titles
  })

In [74]:
# Transgender
get_results_by_field("transgender", df_transgender)

Number of projects in NIH reporter 201
Number of common titles found: 92
Percent common titles: 45.77%


In [75]:
# HIV
get_results_by_field("hiv", df_hiv)

Number of projects in NIH reporter 3781
Number of common titles found: 129
Percent common titles: 3.41%


In [76]:
# LGBTQ
get_results_by_field("lgbtq", df_lgbtq)

Number of projects in NIH reporter 77
Number of common titles found: 39
Percent common titles: 50.65%


In [77]:
# Vaccine hesitancy
get_results_by_field("vaccine hesitancy", df_vaccine_hesitancy)

Number of projects in NIH reporter 44
Number of common titles found: 20
Percent common titles: 45.45%


In [78]:
# Create a Pandas DataFrame from the data list
data_df = pd.DataFrame(data).set_index('field_of_study')

In [79]:
data_df.head()

Unnamed: 0_level_0,nih_count,num_common_titles,percent_common_titles
field_of_study,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
transgender,201,92,45.771144
hiv,3781,129,3.411796
lgbtq,77,39,50.649351
vaccine hesitancy,44,20,45.454545


In [80]:
data_df.to_csv('/content/percent-common-titles-nih-reporter.csv')