# Using Google Bard API (PaLM 2) for Survey Answer Classification  
Since you chose Google Bard API (PaLM 2), we will use it to classify survey responses as High, Medium, or Low based on correctness.

In [42]:
# pip install google-generativeai
import google.generativeai as genai
import pandas as pd

In [None]:
#Google PaLM 2 API key
genai.configure(api_key="!!!!!")

In [44]:
# List available models
models = genai.list_models()
for model in models:
    print(model.name)

models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/learnlm-1.5-pro-experimental
models/embedding-001
models/text-embedding-004
models/aqa
models/imagen-3.

In [45]:
# Define correct answers for numeric questions
numeric_correct_answers = {
    #"data_sources_pre": 4,
    "persona_definition_pre": 1,
    "interactive_persona_pre": 2,
    "data_driven_persona_pre": 2,
    "dynamic_persona_pre": 1,
    "persona_definition_post": 4,
    "interactive_persona_post": 1,
    "data_driven_persona_post": 2,
    "dynamic_persona_post": 1
}

In [46]:
# Function to classify numeric answers
def classify_numeric_answer(column, response):
    """
    Classifies numeric answers as Correct or Not Correct based on predefined correct values.
    """
    try:
        response_value = float(response)  # Convert response to float
        correct_value = numeric_correct_answers.get(column, None)

        if correct_value is not None:
            return "Correct" if response_value == correct_value else "Not Correct"
        else:
            return "No Classification"
    except ValueError:
        return "Invalid"  # Handle cases where response is not a valid number



In [47]:
# Function to grade text-based responses using AI
import time  # Import time module to add delay

def grade_text_answer(question, response):
    """
    Uses Google Bard API (PaLM 2) to classify survey responses as High, Medium, or Low correctness.
    """
    prompt = f"""
    You are an AI grader for a survey. Your job is to evaluate the correctness of participant answers.

    - HIGH: If the answer is fully correct and detailed.
    - MEDIUM: If the answer is somewhat correct but lacks detail.
    - LOW: If the answer is incorrect or unrelated.

    Question: {question}
    Answer: {response}

    How would you score this answer? (Just reply with 'High', 'Medium', or 'Low')
    """

    try:
        model = genai.GenerativeModel("gemini-1.5-pro-latest")
        response = model.generate_content(prompt)
        time.sleep(1)  # Add a 1-second delay to avoid exceeding quota
        return response.text.strip()
    except Exception as e:
        print(f"Error grading response: {e}")
        return "Error"


In [48]:
# Function to compare pre/post responses and assign performance labels
def compare_performance(pre_label, post_label):
    """
    Assigns a performance change label based on pre-survey and post-survey correctness.
    """
    label_order = {"Low": 1, "Medium": 2, "High": 3}  # Convert labels to numeric values for comparison

    if pre_label not in label_order or post_label not in label_order:
        return "Error"  # Handle unexpected labels

    if label_order[post_label] > label_order[pre_label]:
        return "Improved"
    elif label_order[post_label] < label_order[pre_label]:
        return "Declined"
    else:
        return "Same"


In [49]:
# Load survey dataset
file_path = "all_numeric_survey_with questions.csv"  # Replace with your actual survey file
df

Unnamed: 0,start_date_pre,end_date_pre,ip_address_pre,duration_sec_pre,response_id_pre,LocationLatitude_pre,LocationLongitude_pre,DistributionChannel_pre,UserLanguage_pre,participant_id,...,engagement_experience_post,interaction_quality_post,communication_clarity_post,trustworthiness_post,emotional_response_post,naturalness_post,effectiveness_post,comfort_level_post,personalization_post,mental_effort_post
0,2/13/25 0:10,2/13/25 0:16,193.166.113.18,372,R_8GBoA0i1k6yogS1,63.1198,21.6798,0,0,1,...,5,1,7,2,2,1,6,6,1,4
1,2/13/25 0:09,2/13/25 0:16,193.166.113.32,409,R_8QG2UV0Zv0fKVB5,63.1198,21.6798,0,0,23,...,4,2,4,4,2,4,4,2,2,6
2,2/13/25 0:09,2/13/25 0:17,193.166.113.31,462,R_8tHW5RDZd4yBkcx,63.1198,21.6798,0,0,22,...,3,4,6,6,1,4,5,3,4,5
3,2/13/25 0:11,2/13/25 0:17,193.166.113.21,355,R_8462bTis2cTNcii,63.1198,21.6798,0,0,19,...,6,6,6,4,2,3,6,1,4,4
4,2/13/25 0:09,2/13/25 0:18,193.166.117.7,551,R_824J8ZB9ZGfOu8t,63.1198,21.6798,0,0,15,...,4,2,3,3,1,4,3,6,1,6
5,2/13/25 0:10,2/13/25 0:18,193.166.108.65,503,R_8nILfCbV7DT317z,63.1092,21.5936,0,0,13,...,3,2,6,4,1,2,6,4,3,6
6,2/13/25 0:10,2/13/25 0:19,85.76.76.143,518,R_2dXtd952juNtA5k,60.2149,24.799,0,0,7,...,4,4,3,4,2,2,3,5,2,7
7,2/13/25 0:11,2/13/25 0:19,193.166.113.6,487,R_2DYADIupQ1S6J8F,63.1198,21.6798,0,0,12,...,6,6,6,6,2,6,6,2,5,6
8,2/13/25 0:09,2/13/25 0:19,193.166.108.69,632,R_8m1Ou9kltVx56Vz,63.1092,21.5936,0,0,4,...,5,6,6,3,1,6,5,2,4,2
9,2/13/25 0:10,2/13/25 0:20,193.166.113.43,599,R_83pqGRKBhCMAliU,63.1198,21.6798,0,0,25,...,5,2,5,5,5,5,5,3,3,4


In [50]:
# Define all columns to be graded (correctness classification)
numeric_columns = ["persona_definition_pre", "interactive_persona_pre", "data_driven_persona_pre",
                   "dynamic_persona_pre", "persona_definition_post", "interactive_persona_post",
                   "data_driven_persona_post", "dynamic_persona_post"]

In [51]:
# Define all columns to be graded (correctness classification)
text_columns = ["Explain how data-driven personas development could be used to create student personas for a marketing course that you are teaching (or you have participated in as a student). Be as specific as you can. Cover the following aspect in this question: - data collection (e.g., what data to collect, how to collect, how much)", 
               "Explain how data-driven personas development could be used to create student personas for a marketing course that you are teaching (or you have participated in as a student). Be as specific as you can. Cover the following aspect in this question: - data analysis (e.g., how to segment the data, what methods to use)", 
               "Explain how data-driven personas development could be used to create student personas for a marketing course that you are teaching (or you have participated in as a student). Be as specific as you can. Cover the following aspect in this question: - persona building (e.g., how to create persona profiles)", 
               "Explain how data-driven personas development could be used to create student personas for a marketing course that you are teaching (or you have participated in as a student). Be as specific as you can. Cover the following aspect in this question: - and evaluation (e.g., how to evaluate that the personas are accurate and useful)"]  

In [52]:
# Manually specify which columns belong to pre-survey and post-survey
pre_columns = ["For what purposes can tools like Tableau, Power BI, or Python libraries be used in persona development?", 
               "In persona development, why would you use APIs?  (pre)", 
               "For what purposes can machine learning algorithms be used in persona development?  (pre)"] 


post_columns = ["For what purposes can tools like Canva or Powerpoint be used in persona development? (post)", 
                "In persona development, why would you use APIs? (post)", 
                "For what purposes can machine learning algorithms be used in persona development? (post)"]


In [53]:
# Create a new DataFrame to store graded responses
graded_df = df.copy()

In [54]:
# Grade text-based responses (Correctness: HIGH, MEDIUM, LOW)
for col in text_columns:
    question = col.replace("_", " ").title()  # Convert column name to a readable question
    print(f"Grading text responses for: {question}...")
    graded_df[f"{col}_Grade"] = df[col].astype(str).apply(lambda response: grade_text_answer(question, response))

Grading text responses for: Explain How Data-Driven Personas Development Could Be Used To Create Student Personas For A Marketing Course That You Are Teaching (Or You Have Participated In As A Student). Be As Specific As You Can. Cover The Following Aspect In This Question: - Data Collection (E.G., What Data To Collect, How To Collect, How Much)...
Error grading response: 429 Resource has been exhausted (e.g. check quota).
Error grading response: 429 Resource has been exhausted (e.g. check quota).
Error grading response: 429 Resource has been exhausted (e.g. check quota).
Error grading response: 429 Resource has been exhausted (e.g. check quota).
Error grading response: 429 Resource has been exhausted (e.g. check quota).
Error grading response: 429 Resource has been exhausted (e.g. check quota).
Error grading response: 429 Resource has been exhausted (e.g. check quota).
Error grading response: 429 Resource has been exhausted (e.g. check quota).
Error grading response: 429 Resource has 

In [55]:
# Classify numeric responses (Correctness: CORRECT or NOT CORRECT)
for col in numeric_columns:
    print(f"Classifying numeric responses for: {col}...")
    graded_df[f"{col}_Grade"] = df[col].astype(str).apply(lambda response: classify_numeric_answer(col, response))

Classifying numeric responses for: persona_definition_pre...
Classifying numeric responses for: interactive_persona_pre...
Classifying numeric responses for: data_driven_persona_pre...
Classifying numeric responses for: dynamic_persona_pre...
Classifying numeric responses for: persona_definition_post...
Classifying numeric responses for: interactive_persona_post...
Classifying numeric responses for: data_driven_persona_post...
Classifying numeric responses for: dynamic_persona_post...


In [56]:
# Compare pre/post survey responses only for selected columns (Performance: IMPROVED, SAME, DECLINED)
for pre_col, post_col in zip(pre_columns, post_columns):
    print(f"Comparing performance for: {pre_col} → {post_col}...")
    graded_df[f"{pre_col}_Performance"] = graded_df.apply(
        lambda row: compare_performance(row[f"{pre_col}_Grade"], row[f"{post_col}_Grade"]), axis=1
    )


Comparing performance for: For what purposes can tools like Tableau, Power BI, or Python libraries be used in persona development? → For what purposes can tools like Canva or Powerpoint be used in persona development? (post)...


KeyError: 'For what purposes can tools like Tableau, Power BI, or Python libraries be used in persona development?_Grade'

In [None]:
# Save the graded responses
output_file = "survey_with_grades.csv"
graded_df.to_csv(output_file, index=False)
print(f"Graded responses saved to {output_file}")

In [None]:


# Manually specify which columns belong to pre-survey and post-survey
pre_columns = ["column_1_pre", "column_2_pre", "column_3_pre"]  # Replace with actual pre-survey column names
post_columns = ["column_1_post", "column_2_post", "column_3_post"]  # Replace with corresponding post-survey column names

graded_df = df.copy()  # Create a new DataFrame to store graded responses

# Ensure the length of both lists match
if len(pre_columns) != len(post_columns):
    raise ValueError("Pre-survey and post-survey column lists must have the same length.")

# Loop through each pre/post column pair
for pre_col, post_col in zip(pre_columns, post_columns):
    # Extract the question from column name
    question = pre_col.replace("_", " ").title()  # Convert column name to a readable question

    print(f"Grading responses for: {question}...")

    # Grade pre-survey responses
    graded_df[f"{pre_col}_Grade"] = df[pre_col].astype(str).apply(lambda response: grade_answer(question, response))

    # Grade post-survey responses
    graded_df[f"{post_col}_Grade"] = df[post_col].astype(str).apply(lambda response: grade_answer(question, response))

    # Compare pre/post grades to determine performance change
    graded_df[f"{pre_col}_Performance"] = graded_df.apply(
        lambda row: compare_performance(row[f"{pre_col}_Grade"], row[f"{post_col}_Grade"]), axis=1
    )

# Save the graded responses
output_file = "survey_with_grades.csv"
graded_df.to_csv(output_file, index=False)
print(f"Graded responses saved to {output_file}")
