# Using Google Bard API (PaLM 2) for Survey Answer Classification  
Since you chose Google Bard API (PaLM 2), we will use it to classify survey responses as High, Medium, or Low based on correctness.

In [1]:
# pip install google-generativeai
import google.generativeai as genai
import pandas as pd

In [2]:
#Google PaLM 2 API key
genai.configure(api_key="AIzaSyBeHsGmsYYmSMHZUuHc54HvUulIQUDzMxU")

In [None]:
# Define correct answers for numeric questions
numeric_correct_answers = {
    "data_sources_pre": 4,
    "persona_definition_pre": 1,
    "interactive_persona_pre": 2,
    "data_driven_persona_pre": 10,
    "dynamic_persona_pre": 10,
    "persona_definition_post": 10,
    "interactive_persona_post": 10,
    "data_driven_persona_post": 10,
    "dynamic_persona_post": 10
}

In [None]:
# Function to classify numeric answers
def classify_numeric_answer(column, response):
    """
    Classifies numeric answers as Correct or Not Correct based on predefined correct values.
    """
    try:
        response_value = float(response)  # Convert response to float
        correct_value = numeric_correct_answers.get(column, None)

        if correct_value is not None:
            return "Correct" if response_value == correct_value else "Not Correct"
        else:
            return "No Classification"
    except ValueError:
        return "Invalid"  # Handle cases where response is not a valid number



In [3]:
# Function to grade responses using the AI
def grade_answer(question, response):
    """
    Uses Google Bard API (PaLM 2) to classify survey responses as High, Medium, or Low correctness.
    """
    prompt = f"""
    You are an AI grader for a survey. Your job is to evaluate the correctness of participant answers.

    - HIGH: If the answer is fully correct and detailed.
    - MEDIUM: If the answer is somewhat correct but lacks detail.
    - LOW: If the answer is incorrect or unrelated.

    Question: {question}
    Answer: {response}

    How would you score this answer? (Just reply with 'High', 'Medium', or 'Low')
    """

    try:
        model = genai.GenerativeModel("gemini-pro")
        response = model.generate_content(prompt)
        return response.text.strip()
    except Exception as e:
        print(f"Error grading response: {e}")
        return "Error"


In [4]:
# Function to compare pre/post responses and assign performance labels
def compare_performance(pre_label, post_label):
    """
    Assigns a performance change label based on pre-survey and post-survey correctness.
    """
    label_order = {"Low": 1, "Medium": 2, "High": 3}  # Convert labels to numeric values for comparison

    if pre_label not in label_order or post_label not in label_order:
        return "Error"  # Handle unexpected labels

    if label_order[post_label] > label_order[pre_label]:
        return "Improved"
    elif label_order[post_label] < label_order[pre_label]:
        return "Declined"
    else:
        return "Same"


In [5]:
# Load survey dataset
file_path = "all_numeric_survey_with questions.csv"  # Replace with your actual survey file
df = pd.read_csv(file_path)

In [None]:
# Define all columns to be graded (correctness classification)
all_columns = ["Explain how data-driven personas development could be used to create student personas for a marketing course that you are teaching (or you have participated in as a student). Be as specific as you can. Cover the following aspect in this question: - data collection (e.g., what data to collect, how to collect, how much)", 
               "Explain how data-driven personas development could be used to create student personas for a marketing course that you are teaching (or you have participated in as a student). Be as specific as you can. Cover the following aspect in this question: - data analysis (e.g., how to segment the data, what methods to use)", 
               "Explain how data-driven personas development could be used to create student personas for a marketing course that you are teaching (or you have participated in as a student). Be as specific as you can. Cover the following aspect in this question: - persona building (e.g., how to create persona profiles)", 
               "Explain how data-driven personas development could be used to create student personas for a marketing course that you are teaching (or you have participated in as a student). Be as specific as you can. Cover the following aspect in this question: - and evaluation (e.g., how to evaluate that the personas are accurate and useful)"]  

In [None]:
# Manually specify which columns belong to pre-survey and post-survey
pre_columns = ["For what purposes can tools like Tableau, Power BI, or Python libraries be used in persona development?", 
               "In persona development, why would you use APIs?  (pre)", 
               "For what purposes can machine learning algorithms be used in persona development?  (pre)"] 


post_columns = ["For what purposes can tools like Canva or Powerpoint be used in persona development? (post)", 
                "In persona development, why would you use APIs? (post)", 
                "For what purposes can machine learning algorithms be used in persona development? (post)"]


In [None]:
# Create a new DataFrame to store graded responses
graded_df = df.copy()

# Ensure the length of both lists match for performance classification
if len(pre_columns) != len(post_columns):
    raise ValueError("Pre-survey and post-survey column lists must have the same length.")

# Grade all columns (Correctness: HIGH, MEDIUM, LOW)
for col in all_columns:
    question = col.replace("_", " ").title()  # Convert column name to a readable question
    print(f"Grading responses for: {question}...")
    graded_df[f"{col}_Grade"] = df[col].astype(str).apply(lambda response: grade_answer(question, response))

# Compare pre/post survey responses only for selected columns (Performance: IMPROVED, SAME, DECLINED)
for pre_col, post_col in zip(pre_columns, post_columns):
    print(f"Comparing performance for: {pre_col} → {post_col}...")
    graded_df[f"{pre_col}_Performance"] = graded_df.apply(
        lambda row: compare_performance(row[f"{pre_col}_Grade"], row[f"{post_col}_Grade"]), axis=1
    )

# Save the graded responses
output_file = "survey_with_grades.csv"
graded_df.to_csv(output_file, index=False)
print(f"Graded responses saved to {output_file}")


In [None]:


# Manually specify which columns belong to pre-survey and post-survey
pre_columns = ["column_1_pre", "column_2_pre", "column_3_pre"]  # Replace with actual pre-survey column names
post_columns = ["column_1_post", "column_2_post", "column_3_post"]  # Replace with corresponding post-survey column names

graded_df = df.copy()  # Create a new DataFrame to store graded responses

# Ensure the length of both lists match
if len(pre_columns) != len(post_columns):
    raise ValueError("Pre-survey and post-survey column lists must have the same length.")

# Loop through each pre/post column pair
for pre_col, post_col in zip(pre_columns, post_columns):
    # Extract the question from column name
    question = pre_col.replace("_", " ").title()  # Convert column name to a readable question

    print(f"Grading responses for: {question}...")

    # Grade pre-survey responses
    graded_df[f"{pre_col}_Grade"] = df[pre_col].astype(str).apply(lambda response: grade_answer(question, response))

    # Grade post-survey responses
    graded_df[f"{post_col}_Grade"] = df[post_col].astype(str).apply(lambda response: grade_answer(question, response))

    # Compare pre/post grades to determine performance change
    graded_df[f"{pre_col}_Performance"] = graded_df.apply(
        lambda row: compare_performance(row[f"{pre_col}_Grade"], row[f"{post_col}_Grade"]), axis=1
    )

# Save the graded responses
output_file = "survey_with_grades.csv"
graded_df.to_csv(output_file, index=False)
print(f"Graded responses saved to {output_file}")
