In [62]:
import pandas as pd
import csv

In [63]:
grants_1990s = pd.read_csv('https://raw.githubusercontent.com/ejt2703/neh_grant_data_gender/main/NEH_Grants1990s.csv')

In [64]:
grants_2000s = pd.read_csv('https://raw.githubusercontent.com/ejt2703/neh_grant_data_gender/refs/heads/main/NEH_Grants2000s.csv')

In [65]:
grants_2010s = pd.read_csv('https://raw.githubusercontent.com/ejt2703/neh_grant_data_gender/refs/heads/main/NEH_Grants2010s.csv')

In [66]:
grants_2020s = pd.read_csv('https://raw.githubusercontent.com/ejt2703/neh_grant_data_gender/refs/heads/main/NEH_Grants2020s.csv')

In [67]:
# AppNumber,ApplicantType,Institution,OrganizationType,InstCity,InstState,InstPostalCode,InstCountry,CongressionalDistrict,Latitude,Longitude,CouncilDate,YearAwarded,ProjectTitle,Program,Division,ApprovedOutright,ApprovedMatching,AwardOutright,AwardMatching,OriginalAmount,SupplementAmount,BeginGrant,EndGrant,ProjectDesc,ToSupport,PrimaryDiscipline,SupplementCount,Supplements,ParticipantCount,Participants,DisciplineCount,Disciplines

In [68]:
combined_grants = [grants_1990s, grants_2000s, grants_2010s, grants_2020s]
combined_df = pd.concat(combined_grants, ignore_index=True)

In [69]:
combined_df.to_csv("combined_df.csv", index=False)

I want to:
1. Create a new column called "gender_focus".
2. Filter for projects that relate to gender using a list of keywords that appear in either the ProjectTitle, ProjectDesc, or ToSupport columns, or that are listed explicitly as a gender studies project in the PrimaryDiscipline or Disciplines columns.
3. Add a "yes", "no", or "unknown" to the "gender_focus" column based on the result.

In [70]:
gender_keywords = [
'GENDER',
 'GENDERED',
 'GENDERING',
 'SEXUALITY',
 'SEXUALIZE',
 'SEXUALIZATION',
 'SEXUAL',
 'LGBTQ+',
 'LGBT',
 'LGBT+',
 'LGBTQIA+',
 'LGBTQIA',
 'LESBIAN',
 'GAY',
 'BISEXUAL',
 'ASEXUAL',
 'TRANSGENDER',
 'QUEER',
 'QUEERED',
 'QUEERING',
 'WOMAN',
 'WOMEN',
 'WOMANHOOD',
 'FEMALE',
 'FEMALES',
 'GIRL',
 'GIRLS',
 'GIRLHOOD',
 'FEMININE',
 'FEMININITY',
 'MASCULINE',
 'MASCULINITY',
 'FEMINISM',
 'FEMINIST',
 'FEMINISMS',
 'PATRIARCHY',
 'PATRIARCHAL',
 'HETERONORMATIVE',
 'HETERONORMATIVITY',
 'CISGENDER',
 'CISNORMATIVE',
 'CISNORMATIVITY',
 'EMILY DICKINSON',
 'FRANCES PERKINS',
 'SHIRLEY CHISHOLM',
 'MARY SHELLEY',
 'CATHARINE',
 'SOR JUANA',
 'ESTHER',
 'HANNAH',
 'LOUISA',
 'ISADORA',
 'MME DE STAEL',
 'OCTAVIA BUTLER',
 'AGNES',
 'GABRIELA MISTRAL',
 'ISABEL',
 'LUISA',
 'UNA JEFFERS',
 'HATSHEPSUT',
 'MINÉ OKUBO',
 'BERNARDINE',
 'GLBT',
 'GODDESS',
 'ELIZABETH',
 'CLEOPATRA',
 'CHARLOTTE',
 'BRONTE',
 'WOOLF',
 'TONI MORRISON',
 'MAYA ANGELOU',
 'BELL HOOKS',
 'ALICE WALKER',
 'ANNA JULIA COOPER',
 'ZORA NEALE HURSTON',
 'FRIDA KAHLO',
 'CELIA CRUZ',
 'RIGOBERTA MENCHÚ',
 'RIGOBERTA MENCHU',
 'SANDRA CISNEROS',
 'GLORIA ANZALDÚA',
 'GLORIA ANZALDUA',
 'JULIA DE BURGOS',
 'ESMERALDA SANTIAGO',
 'LORNA DEE CERVANTES',
 'LORRAINE HANSBERRY',
 'ASSATA SHAKUR',
 'CHIMAMANDA NGOZI ADICHIE',
 'LESLIE MARMON SILKO',
 'LOUISE ERDRICH',
 'JOY HARJO',
 'LINDA HOGAN',
 'ROBIN WALL KIMMERER',
 'WINONA LADUKE',
 'AUNG SAN SUU KYI',
 'YOKO ONO',
 'MAXINE HONG KINGSTON',
 'AMY TAN',
 'CATHY PARK HONG',
 'OCEAN VUONG',
 'THERESA HAK KYUNG CHA',
 'GAYATRI SPIVAK',
 'GHADA AL-SAMMAN',
 'ASSIA DJEBAR',
 'LEILA ABOULELA',
 'MARJANE SATRAPI',
 'FATEMA MERNISSI',
 'SAPPHO',
 'SAPPHIC',
 'SAPPHISM',
 'HAUNANI-KAY TRASK',
 'TERESIA TEAIWA',
 'SELINA TUSITALA MARSH',
 'PATRICIA GRACE',
 'GRACE MERA MOLISA',
 'SUSAN',
 'DAUGHTER',
 'ADELAIDE JOHNSON',
 'ALICE CHENOWETH',
 'HELEN HAMILTON GARDENER,ALMA THOMAS',
 'AMY LOWELL',
 'ANNA JULIA COOPER',
 'BEATRIX POTTER',
 'BELLE DA COSTA GREENE',
 'CHARLOTTE PERKINS GILMAN',
 'EUDORA WELTY',
 'ESMERALDA SANTIAGO',
 'HANNAH CRAFTS',
 'ISABELLA STEWART GARDNER',
 'JANE ADDAMS',
 'JOAN OF ARC',
 'JULIA ALVAREZ',
 'JULIA ÁLVAREZ',
 'LOIE FULLER',
 'LOÏE FULLER',
 'KATHARINE',
 'LA MALINCHE',
 'MFK FISHER',
 'M.F.K. FISHER',
 'MARTHA MOORE BALLARD',
 'MARY CASSATT',
 'MOTHER CABRINI',
 'MOTHER FRANCES XAVIER CABRINI',
 'MOTHER THERESA',
 'NATALIE DE BLOIS',
 'RUTH BADER GINSBURG',
 'SYLVIA PLATH',
 'URSULA PARROTT',
 'URSULA LE GUIN',
 'URSULA K. LE GUIN',
 'URSULA K LE GUIN',
 'WINONA LADUKE'
 ]

In [71]:
gender_discipline = [
    "GENDER STUDIES",
    "WOMEN'S HISTORY"
]

In [72]:
combined_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34632 entries, 0 to 34631
Data columns (total 33 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   AppNumber              34632 non-null  object 
 1   ApplicantType          34632 non-null  int64  
 2   Institution            34632 non-null  object 
 3   OrganizationType       34632 non-null  object 
 4   InstCity               34632 non-null  object 
 5   InstState              34632 non-null  object 
 6   InstPostalCode         34628 non-null  object 
 7   InstCountry            34632 non-null  object 
 8   CongressionalDistrict  34632 non-null  int64  
 9   Latitude               34632 non-null  object 
 10  Longitude              34632 non-null  object 
 11  CouncilDate            34632 non-null  object 
 12  YearAwarded            34632 non-null  int64  
 13  ProjectTitle           34632 non-null  object 
 14  Program                34632 non-null  object 
 15  Di

In [73]:
info_columns = ['ProjectTitle', 'ProjectDesc', 'ToSupport']
discipline_columns = ['PrimaryDiscipline', 'Disciplines']

In [74]:
def gender_filter(row):
    for column in info_columns:
        if any(keyword in row[column].upper() for keyword in gender_keywords):
            return True
    for column in discipline_columns:
        if any(keyword in row[column].upper() for keyword in gender_discipline):
            return True
    return False

In [75]:
combined_df['gender_focus'] = combined_df.apply(gender_filter, axis=1)

# apply lets me run the function across rows or columns
# axis=1 tells pandas to specifically apply the function to each row of the dataframe

AttributeError: 'float' object has no attribute 'upper'