In [1]:
import pandas as pd

# Load CSV

In [3]:
df = pd.read_csv("questions.csv")
df.head()

Unnamed: 0,question_id,question_text,response_options,field_date
0,Q001,Do you support or oppose a policy that would p...,Strongly support|Somewhat support|Somewhat opp...,2024-09-15
1,Q002,Which message is more persuasive in favor of e...,Message A: Expanding Medicaid would provide he...,2024-11-02
2,Q003,Which of the following issues are most importa...,Economy and jobs|Healthcare|Education|Climate ...,2024-10-20
3,Q004,"If the election for Congress were held today, ...",Democratic candidate|Republican candidate|Unde...,2024-10-28
4,Q005,Do you have a favorable or unfavorable opinion...,Very favorable|Somewhat favorable|Somewhat unf...,2024-08-12


# Build the questions table

In [5]:
questions_df = df[['question_id', 'question_text', 'field_date']].copy()

# Infer question type
def infer_type(text):
    text = text.lower()
    if "which message" in text:
        return "forced_choice"
    if "which of the following" in text:
        return "multi_select"
    if "favorable" in text:
        return "favorability"
    if "support or oppose" in text:
        return "likert"
    return "single_choice"

questions_df['question_type'] = questions_df['question_text'].apply(infer_type)

questions_df.head()

Unnamed: 0,question_id,question_text,field_date,question_type
0,Q001,Do you support or oppose a policy that would p...,2024-09-15,likert
1,Q002,Which message is more persuasive in favor of e...,2024-11-02,forced_choice
2,Q003,Which of the following issues are most importa...,2024-10-20,multi_select
3,Q004,"If the election for Congress were held today, ...",2024-10-28,single_choice
4,Q005,Do you have a favorable or unfavorable opinion...,2024-08-12,favorability


# Build the response_options table

In [6]:
rows = []

for _, row in df.iterrows():
    options = [opt.strip() for opt in row['response_options'].split("|")]
    for i, opt in enumerate(options):
        rows.append({
            'question_id': row['question_id'],
            'option_text': opt,
            'option_order': i
        })

response_options_df = pd.DataFrame(rows)
response_options_df.head()

Unnamed: 0,question_id,option_text,option_order
0,Q001,Strongly support,0
1,Q001,Somewhat support,1
2,Q001,Somewhat oppose,2
3,Q001,Strongly oppose,3
4,Q002,Message A: Expanding Medicaid would provide he...,0


# (Optional) Python Class to recreate QuestionBanks in the future

In [7]:
class QuestionBank:
    def __init__(self, questions_df, response_options_df):
        self.questions = questions_df
        self.response_options = response_options_df

    def get_question(self, question_id):
        q = self.questions[self.questions['question_id'] == question_id].iloc[0]
        opts = self.response_options[self.response_options['question_id'] == question_id]
        return q, opts

    def search(self, keyword):
        mask = self.questions['question_text'].str.contains(keyword, case=False)
        return self.questions[mask]

qb = QuestionBank(questions_df, response_options_df)

q, opts = qb.get_question("Q001")
print(q)
print(opts)

question_id                                                   Q001
question_text    Do you support or oppose a policy that would p...
field_date                                              2024-09-15
question_type                                               likert
Name: 0, dtype: object
  question_id       option_text  option_order
0        Q001  Strongly support             0
1        Q001  Somewhat support             1
2        Q001   Somewhat oppose             2
3        Q001   Strongly oppose             3
