In this notebook, I would like to see if we can identify all sustainability-focused courses required for the STARS submission.

This would be done by searching for "\bsustain\*" in course outlines and descriptions.

The baseline for this work are the courses in the sustainability minor.


In [62]:
import pandas as pd
import re
from tqdm import tqdm

In [115]:
BASELINE = [
    "ANTHROP 2AN3",
    "ANTHROP 2C03",
    "ANTHROP 3C03",
    "ANTHROP 3Y03",
    "ANTHROP 4CL3",
    "ANTHROP 4CP3",
    "ARTSSCI 3GJ3",
    "ARTSSCI 4CA3",
    "ARTSSCI 4CK3",
    "ARTSSCI 4CM3",
    "ARTSSCI 4EP3",
    "ART 2ER3",
    "CHEM 2BC3",
    "CHEM 2SC3",
    "CHEM 3I03",
    "CHEMENG 4A03",
    "CMST 4P03",
    "CMTYENGA 2A03",
    "COMMERCE 1B03",
    "COMMERCE 1E03",
    "COMMERCE 3FL3",
    "COMMERCE 4BL3",
    "COMMERCE 4BM3",
    "COMMERCE 4MG3",
    "COMMERCE 4SG3",
    "CSCT 2Z03",
    "EARTHSC 2GG3",
    "EARTHSC 2WW3",
    "EARTHSC 2EI3",
    "EARTHSC 4EA3",
    "ECON 2J03",
    "ECON 3W03",
    "ENGLISH 1H03",
    "ENGLISH 2Z03",
    "ENGLISH 3GG3",
    "ENGLISH 4E03",
    "ENGNMGT 5EL3",
    "ENGPHYS 3ES3",
    "ENGPHYS 4X03",
    "ENGSOCTY 2X03",
    "ENGSOCTY 3Z03",
    "ENVIRSC 1B03",
    "ENVIRSC 1C03",
    "ENVIRSC 2EI3",
    "ENVIRSC 2WW3",
    "ENVIRSC 3EE3",
    "ENVIRSC 4EA3",
    "ENVIRSC 4HH3",
    "ENVSOCTY 1HA3",
    "ENVSOCTY 1HB3",
    "ENVSOCTY 2EI3",
    "ENVSOCTY 2UI3",
    "ENVSOCTY 3EC3",
    "ENVSOCTY 3EE3",
    "ENVSOCTY 3EG3",
    "ENVSOCTY 3ER3",
    "ENVSOCTY 3UP3",
    "ENVSOCTY 4EA3",
    "ENVSOCTY 4HH3",
    "ENVSOCTY 4US3",
    "GEOG 1HA3",
    "GEOG 1HB3",
    "GEOG 2EI3",
    "GEOG 2UI3",
    "GEOG 3EC3",
    "GEOG 3EE3",
    "GEOG 3ER3",
    "GEOG 3UP3",
    "GEOG 4EA3",
    "GEOG 4HH3",
    "GLOBALZN 1A03",
    "HLTHAGE 2AN3",
    "HLTHAGE 4M03",
    "HISTORY 2EH3",
    "HISTORY 3CH3",
    "HISTORY 3UA3",
    "HISTORY 4K03",
    "HTHSCI 1RR3",
    "HTHSCI 3AH3",
    "HTHSCI 4LD3",
    "HTHSCI 4PA3",
    "HTHSCI 4ZZ3",
    "IBH 1AD3",
    "IBH 1BD3",
    "IBH 3BA3",
    "IBH 3BB3",
    "INDIGST 2D03",
    "INDIGST 3R03",
    "INDIGST 4A03",
    "INSPIRE 2GS3",
    "LIFESCI 2H03",
    "LIFESCI 2X03",
    "LIFESCI 3D03",
    "LIFESCI 3H03",
    "LIFESCI 4F03",
    "MATLS 4I03",
    "MECHENG 4O04",
    "PEACEST 1A03",
    "PEACEST 3D03",
    "PEACEST 4G03",
    "PEACEST 4L03",
    "PEACEST 4FC3",
    "PEACEST 4J03",
    "PEACJUST 1A03",
    "PEACJUST 3D03",
    "PEACJUST 3GG3",
    "PEACJUST 4FC3",
    "PEACJUST 4G03",
    "PEACJUST 4L03",
    "PHILOS 2N03",
    "PHILOS 3L03",
    "POLSCI 3GC3",
    "POLSCI 4PE3",
    "RELIGST 2W03",
    "SCICOMM 2M03",
    "SCIENCE 2M03",
    "SEP 4A03",
    "SEP 4EL3",
    "SEP 4X03",
    "SOCIOL 4NN3",
    "SUSTAIN 1S03",
    "SUSTAIN 2GS3",
    "SUSTAIN 2IS3",
    "SUSTAIN 2S03",
    "SUSTAIN 2SD3",
    "SUSTAIN 2SS3",
    "SUSTAIN 3S03",
    "SUSTAIN 3SS3",
    "SUSTAIN 4S06",
    "INSPIRE 2GS3",
    "SUSTAIN 1S03",
    "SUSTAIN 2GS3",
    "SUSTAIN 2IS3",
    "SUSTAIN 2S03",
    "SUSTAIN 2SD3",
    "SUSTAIN 3S03",
    "SUSTAIN 4S06",
]
BASELINE = set(BASELINE)
ugrad_outlines_2023 = pd.read_json(
    "../../data/raw/2023_all_ugrad_outlines.jsonl", orient="records", lines=True
)
ugrad_outlines_2023["CC_CN"] = ugrad_outlines_2023.apply(
    lambda row: f'{row["COURSE CODE"]} {row["CATALOG NBR"]}', axis=1
)
ugrad_outlines_2023["CC_CN"] = ugrad_outlines_2023["CC_CN"].apply(
    lambda s: re.sub(r"[A|B]$", "", s)
)
ugrad_outlines_2023["text"] = ugrad_outlines_2023["text"].apply(
    lambda s: re.sub(r"REQUESTS FOR RELIEF.+", "", s, flags=re.IGNORECASE | re.DOTALL)
)
ugrad_outlines_2023["text"] = ugrad_outlines_2023["text"].apply(
    lambda s: re.sub(r"Academic Integrity.+", "", s, flags=re.IGNORECASE | re.DOTALL)
)
ugrad_outlines_2023["text"] = ugrad_outlines_2023["text"].apply(
    lambda s: re.sub(
        r"We recognize and acknowledge.+around the Great Lakes",
        "",
        s,
        flags=re.IGNORECASE | re.DOTALL,
    )
)

In [120]:
q = "peace"
for i, row in ugrad_outlines_2023.iterrows():
    if q in row["text"]:
        break

# print(row["text"])

In [125]:
AASHE_KEYWORDS = [
    r"\b.+violen.+",
    # r"\baccess.+",
    r"\baffordab.+",
    r"\bbiodiversity",
    r"\bcitizenship",
    r"\bclimate",
    r"\bconservation",
    r"\bcontamina.+",
    r"\bdiscriminat.+",
    r"\bdiversity",
    r"\becolog.+",
    r"\benergy",
    # r"\benvironment.+",
    r"\bethic.+",
    r"\bethnicity",
    r"\bgender",
    r"\bgreen",
    r"\bhunger",
    r"\bimmigra.+",
    r"\binclus.+",
    r"\bIndigenous",
    r"\bjustice",
    r"\bminorit.+ ",
    r"\bmulticultural ",
    r"\bpeace",
    r"\bpollut.+",
    r"\bpoverty",
    r"\bqueer",
    r"\bracial ",
    r"\bracism",
    r"\bresilien.+",
    # r"\bresponsib.+",
    r"\bsafe.+",
    r"\bsustainab.+",
    r"\btransgender ",
    r"\btransport.+",
    r"\burbanization ",
    r"\bwaste",
    r"\bwastewater ",
    r"\bwater",
    r"\bwildlife",
]

counts = dict()
sustain_courses = set()

for kw in tqdm(AASHE_KEYWORDS):
    sc = ugrad_outlines_2023[
        ugrad_outlines_2023["text"].apply(lambda x: True if re.search(kw, x) else False)
    ]
    counts[kw] = len(sc["CC_CN"])
    sustain_courses |= set(sc["CC_CN"])

100%|██████████| 1/1 [00:00<00:00,  3.56it/s]


In [126]:
minor_only = BASELINE - sustain_courses
query_only = sustain_courses - BASELINE
both = BASELINE.intersection(sustain_courses)

print(
    f"""
Minor only:\t{len(minor_only)}
Query only:\t{len(query_only)}
Both: \t\t{len(both)}

"""
)


Minor only:	93
Query only:	103
Both: 		36


