In [1]:
import pandas as pd
import re
import numpy as np

In [2]:
content_req_1 = pd.read_csv("../data/guttmacher_content_requirements_sex_ed_hiv.csv", index_col=0)

In [3]:
content_req_2 = pd.read_csv("../data/guttmacher_content_requirements_sex_ed_hiv_part_2.csv", index_col=0)

In [4]:
life_skills = pd.read_csv("../data/guttmacher_sex_ed_life_skills.csv", index_col=0)

In [5]:
flags_df = content_req_1.join(content_req_2, how="outer")

In [6]:
flags_df = flags_df.join(life_skills, how="outer")

In [7]:
flags_df = flags_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [8]:
flags_df

Unnamed: 0,SEX EDUCATION MANDATED,HIV EDUCATION MANDATED,Be Medically Accurate,Be Age Appropriate,Be Culturally Appropriate and Unbiased,Cannot Promote Religion,Notice: Parental Role,Consent: Parental Role,Opt-out allowed: Parental Role,Contraception: Sex Ed,...,Importance of sex Only within marriage: Sex Ed,Sexual orientation: Sex Ed,Negative outcomes of teen sex: Sex Ed,Condoms: HIV,Abstinence: HIV,Healthy relationships,Sexual decision-making and self-discipline,Refusal skills and personal boundaries,Consent,Dating and sexual violence prevention
Alabama,,X,,X,,,,,X,X,...,X,,,X,Stress,,X,X,,X
Alaska,,,,,,,X,,X,,...,,,,,,,,,,X
Arizona,,,HIV,X,,,X,Sex,HIV,,...,,,X,,Stress,X,X,X,,X
Arkansas,,,,,,,,,,,...,X,,,,Stress,X,X,X,,X
California,X,X,X,X,X,X,X,,X,X,...,,Inclusive,,X,Cover,X,X,X,,X
Colorado,,,X,X,X,X,X,,X,X,...,,Inclusive,X,X,Cover,X,X,X,X,X
Connecticut,,X,,,,,,,X,X,...,,Inclusive,X,,,X,X,,,X
Delaware,X,X,,,,,,,,X,...,,Inclusive,,X,Stress,X,X,X,X,X
Dist. of Columbia,X,X,,X,,,X,,X,X,...,,Inclusive,X,,Cover,X,X,X,X,X
Florida,X,X,,X,,,,,X,,...,X,Negative,X,,Stress,X,X,,,X


In [9]:
flags_df = pd.get_dummies(flags_df)

In [10]:
policy_topic = pd.read_csv("../data/policy_topic.csv", index_col=0)

In [11]:
policy_topic

Unnamed: 0_level_0,Policy Title,Topic Model
State / Jurisdiction,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,Amends Alabama Code Title 16. Education § 16-4...,5.0
Alabama,Alabama Code Title 16. Education § 16-40A-2 (e...,5.0
Alaska,LAWS OF ALASKA,2.0
Alaska,An Act renaming portions of the Alaska Safe Ch...,2.0
Alaska,Alaska Statutes 2020 AS 14.30.352,2.0
...,...,...
Washington,RCW 28A.230.020 Common school curriculum.,10.0
Wisconsin,115.35 Health problems education program.,10.0
Wisconsin,118.019 Human growth and development instruct...,8.0
West Virginia,§18-2-9. Required courses of instruction.,10.0


In [12]:
def update_columns(column):
    column = column.rstrip("_X")
    column = re.sub("\W+", "_", column).lower()
    return column

In [13]:
flags_df.columns = flags_df.columns.to_series().apply(update_columns)

In [14]:
flags_df

Unnamed: 0,sex_education_mandated,hiv_education_mandated,be_medically_accurate_hiv,be_medically_accurate,be_age_appropriate_hiv,be_age_appropriate,be_culturally_appropriate_and_unbiased,cannot_promote_religion,notice_parental_role_hiv,notice_parental_role,...,sexual_orientation_sex_ed_prohibited,negative_outcomes_of_teen_sex_sex_ed,condoms_hiv,abstinence_hiv_cover,abstinence_hiv_stress,healthy_relationships,sexual_decision_making_and_self_discipline,refusal_skills_and_personal_boundaries,consent,dating_and_sexual_violence_prevention
Alabama,0,1,0,0,0,1,0,0,0,0,...,0,0,1,0,1,0,1,1,0,1
Alaska,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
Arizona,0,0,1,0,0,1,0,0,0,1,...,0,1,0,0,1,1,1,1,0,1
Arkansas,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,1,1,0,1
California,1,1,0,1,0,1,1,1,0,1,...,0,0,1,1,0,1,1,1,0,1
Colorado,0,0,0,1,0,1,1,1,0,1,...,0,1,1,1,0,1,1,1,1,1
Connecticut,0,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,1,0,0,1
Delaware,1,1,0,0,0,0,0,0,0,0,...,0,0,1,0,1,1,1,1,1,1
Dist. of Columbia,1,1,0,0,0,1,0,0,0,1,...,0,1,0,1,0,1,1,1,1,1
Florida,1,1,0,0,0,1,0,0,0,0,...,0,1,0,0,1,1,1,0,0,1


In [15]:
for topic in range(1,10):
    flags_df[f"topic_{topic}"] = np.nan
for state in flags_df.index:
    if state in policy_topic.index:
        topics = policy_topic.loc[state]["Topic Model"].astype(int)
        try:
            topic_set = set(topics)
        except TypeError:
            topic_set = set([topics])
        for topic in topic_set:
            flags_df.loc[state, f"topic_{topic}"] = 1
flags_df = flags_df.fillna(0).astype(int)

In [16]:
flags_df

Unnamed: 0,sex_education_mandated,hiv_education_mandated,be_medically_accurate_hiv,be_medically_accurate,be_age_appropriate_hiv,be_age_appropriate,be_culturally_appropriate_and_unbiased,cannot_promote_religion,notice_parental_role_hiv,notice_parental_role,...,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10
Alabama,0,1,0,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
Alaska,0,0,0,0,0,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0
Arizona,0,0,1,0,0,1,0,0,0,1,...,0,0,0,0,1,0,0,1,0,1
Arkansas,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,1,0,0
California,1,1,0,1,0,1,1,1,0,1,...,1,0,1,0,0,0,0,0,0,0
Colorado,0,0,0,1,0,1,1,1,0,1,...,1,0,0,0,0,0,0,0,0,0
Connecticut,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
Delaware,1,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0
Dist. of Columbia,1,1,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
Florida,1,1,0,0,0,1,0,0,0,0,...,0,1,0,1,1,0,0,1,0,0
