# Import Libraries
Import the necessary libraries, specifically pandas and json.

In [1]:
# Import Libraries
import pandas as pd
import json

# Load JSON Data
Load the JSON file containing the survey responses using Python's built-in open() function or other methods.

In [4]:
# Load JSON Data
with open('/workspaces/gvca_survey_analytics/response_analysis_logs/2025-02-02_02-22-47_ai_response_analysis.json', 'r') as file:
    data = json.load(file)

# Extract relevant information and create a DataFrame
responses = []
for entry in data:
    if entry['role'] == 'user':
        responses.append(entry['content'])

# Create DataFrame
df = pd.DataFrame(responses, columns=['response'])

# Display the DataFrame
df.head()

Unnamed: 0,response
0,\n You are a skilled analyst with experienc...
1,Here are the survey responses to analyze:\n- (...
2,now check and make sure none of the respondent...
3,now summarize the open response data into a li...


# Parse and Convert Data
Parse the JSON data and convert it into a pandas DataFrame making sure that each survey response is a different row.

In [5]:
# Parse and Convert Data

# Extract relevant information and create a DataFrame
responses = []
for entry in data:
    if entry['role'] == 'user':
        # Split the responses by new line and filter out empty strings
        split_responses = [resp.strip() for resp in entry['content'].split('\n') if resp.strip()]
        responses.extend(split_responses)

# Create DataFrame
df = pd.DataFrame(responses, columns=['response'])

# Display the DataFrame
df.head()

Unnamed: 0,response
0,You are a skilled analyst with experience in s...
1,You have been asked to analyze the responses t...
2,of the students at a classical education focus...
3,View Classical Academy. The survey was conduc...
4,the parents on the school's performance and to...


# Display the DataFrame
Display the resulting DataFrame using the .head() or print() commands to verify the correct loading of data.

In [6]:
# Display the DataFrame
df.head()

Unnamed: 0,response
0,You are a skilled analyst with experience in s...
1,You have been asked to analyze the responses t...
2,of the students at a classical education focus...
3,View Classical Academy. The survey was conduc...
4,the parents on the school's performance and to...


In [7]:
# Create a new DataFrame with user prompts and AI responses
user_prompts = []
ai_responses = []

# Iterate through the data assuming a user entry is immediately followed by an AI response
for i in range(len(data) - 1):
    if data[i]['role'] == 'user' and data[i+1]['role'] == 'assistant':
        user_prompts.append(data[i]['content'])
        ai_responses.append(data[i+1]['content'])

# Create DataFrame only if there are matching prompt-response pairs
if user_prompts and ai_responses:
    df_prompts_responses = pd.DataFrame({
        'prompt': user_prompts,
        'ai_response': ai_responses
    })
    display(df_prompts_responses.head())
else:
    print("No matching prompt-response pairs found.")

Unnamed: 0,prompt,ai_response
0,Here are the survey responses to analyze:\n- (...,"```json\n[\n {\n ""respondent_id"": 11478722..."
1,now check and make sure none of the respondent...,I have thoroughly reviewed the JSON output pro...
2,now summarize the open response data into a li...,"```json\n{\n ""summary"": {\n ""Curriculum"": ..."


In [8]:
import json

def format_json_response(response):
    stripped = response.strip()
    if stripped.startswith("```json"):
        # Split into lines and remove the opening and closing markdown markers if present
        lines = stripped.splitlines()
        # Remove the first line (opening marker)
        if lines and lines[0].strip().startswith("```json"):
            lines = lines[1:]
        # Remove the last line if it is the closing marker
        if lines and lines[-1].strip() == "```":
            lines = lines[:-1]
        json_str = "\n".join(lines).strip()
        try:
            json_obj = json.loads(json_str)
            formatted_json = json.dumps(json_obj, indent=4)
            return formatted_json
        except Exception as e:
            # If JSON parsing fails, leave the response unchanged
            return response
    else:
        return response

# Apply the cleanup function to the ai_response column
df_prompts_responses['ai_response'] = df_prompts_responses['ai_response'].apply(format_json_response)
display(df_prompts_responses.head())

Unnamed: 0,prompt,ai_response
0,Here are the survey responses to analyze:\n- (...,"[\n {\n ""respondent_id"": 11478722086..."
1,now check and make sure none of the respondent...,I have thoroughly reviewed the JSON output pro...
2,now summarize the open response data into a li...,"```json\n{\n ""summary"": {\n ""Curriculum"": ..."


In [23]:
import re

# Extract the JSON string from the ai_response column of row 2
json_str_row_2 = df_prompts_responses.loc[2, 'ai_response']

def clean_json_string(s):
    s = s.strip()
    if s.startswith("```json"):
        lines = s.splitlines()
        if lines and lines[0].strip().startswith("```json"):
            lines = lines[1:]
        if lines and lines[-1].strip() == "```":
            lines = lines[:-1]
        s = "\n".join(lines).strip()
    # Extract only the JSON object between the first '{' and the last '}'
    json_start = s.find('{')
    json_end = s.rfind('}')
    if json_start != -1 and json_end != -1:
        s = s[json_start:json_end+1]
    return s

def fix_json_string(s):
    try:
        return json.loads(s)
    except json.JSONDecodeError as e:
        # Attempt to fix common JSON issues:
        s_fixed = s.replace("'", '"')
        s_fixed = s_fixed.rstrip(',')
        s_fixed = s_fixed.replace('}{', '},{')
        s_fixed = re.sub(r'(?<!")(\b\w+\b)(?!"):', r'"\1":', s_fixed)
        s_fixed = re.sub(r',\s*([}\]])', r'\1', s_fixed)
        # Try iterative insertion of missing commas at error positions
        attempts = 0
        while attempts < 3:
            try:
                return json.loads(s_fixed)
            except json.JSONDecodeError as e_inner:
                error_message = str(e_inner)
                if "Expecting ',' delimiter" in error_message:
                    pos = e_inner.pos
                    # Insert a comma at the error position if not already present
                    if pos < len(s_fixed) and s_fixed[pos] not in [',', '}', ']']:
                        s_fixed = s_fixed[:pos] + ',' + s_fixed[pos:]
                    else:
                        break
                    attempts += 1
                else:
                    break
        try:
            return json.loads(s_fixed)
        except json.JSONDecodeError as e_final:
            print(f"Error decoding JSON: {e_final}")
            return None

# Clean the JSON string
cleaned_json_str = clean_json_string(json_str_row_2)

if cleaned_json_str:
    json_data_row_2 = fix_json_string(cleaned_json_str)
    if json_data_row_2:
        df_json_row_2 = pd.DataFrame(json_data_row_2)
        display(df_json_row_2.head())
    else:
        print("The JSON string could not be fixed.")
else:
    print("The JSON string is empty or invalid.")

Error decoding JSON: Expecting ',' delimiter: line 6 column 140 (char 755)
The JSON string could not be fixed.


In [34]:
import json
import re

# Extract the JSON string from the ai_response column of row 2
json_str_row_2 = df_prompts_responses.loc[2, 'ai_response']

def extract_json_and_text(s):
    s = s.strip()
    json_start = s.find('{')
    json_end = s.rfind('}')

    if json_start != -1 and json_end != -1:
        json_str = s[json_start:json_end+1]
        additional_text = s[:json_start].strip() + s[json_end+1:].strip()
        return json_str, additional_text
    else:
        return None, s

def fix_json_string(s):
    try:
        return json.loads(s)
    except json.JSONDecodeError as e:
        s_fixed = s.replace("'", '"')
        s_fixed = s_fixed.rstrip(',')
        s_fixed = s_fixed.replace('}{', '},{')
        s_fixed = re.sub(r'(?<!")(\b\w+\b)(?!"):', r'"\1":', s_fixed)
        s_fixed = re.sub(r',\s*([}\]])', r'\1', s_fixed)
        attempts = 0
        while attempts < 3:
            try:
                return json.loads(s_fixed)
            except json.JSONDecodeError as e_inner:
                error_message = str(e_inner)
                if "Expecting ',' delimiter" in error_message:
                    pos = e_inner.pos
                    if pos < len(s_fixed) and s_fixed[pos] not in [',', '}', ']']:
                        s_fixed = s_fixed[:pos] + ',' + s_fixed[pos:]
                    else:
                        break
                    attempts += 1
                else:
                    break
        try:
            return json.loads(s_fixed)
        except json.JSONDecodeError as e_final:
            print(f"Error decoding JSON: {e_final}")
            return None

# Extract JSON and additional text
json_str, additional_text = extract_json_and_text(json_str_row_2)

if json_str:
    json_data_row_2 = fix_json_string(json_str)
    if json_data_row_2:
        df_json_row_2 = pd.DataFrame(json_data_row_2)
        display(df_json_row_2.head())
    else:
        print("The JSON string could not be fixed.")
else:
    print("No valid JSON found in the string.")

if additional_text:
    print("Additional text found:")
    print(additional_text)
else:
    print("No additional text found.")
    # Display the additional text found
    if additional_text:
        print("Additional text found:")
        print(additional_text)
    else:
        print("No additional text found.")

Unnamed: 0,summary
Curriculum,{'Consistency and Standards': 'Parents express...
Teachers,{'Quality and Dedication': 'Teachers are highl...
Communication,{'Timeliness and Clarity': 'Parents are reques...
Policies & Administration,{'Carline and Pickup Procedures': 'A significa...
Culture & Virtues,{'Alignment with Family Values': 'Parents appr...


Additional text found:
```json```

**Summary of Themes:**

1. **Curriculum**
   - **Consistency and Standards:** Parents seek uniform classroom and homework expectations across different teachers. There are concerns regarding the complexity of math curriculum and the introduction of multiple problem-solving methods.
   - **Academic Rigor:** High academic standards are appreciated, especially in math, reading, writing, and classical subjects like Latin. While challenging, the workload is a source of stress and potential burnout.
   - **Language Offerings:** Requests for additional language options, particularly Spanish, to enhance students' linguistic skills and future career prospects.
   - **Support for Diverse Learning Needs:** Emphasis on providing adequate support for students with learning differences, such as ADHD, and appreciation for existing tutoring and homework help programs.

2. **Teachers**
   - **Quality and Dedication:** Teachers are highly praised for their commitment, 

In [35]:
# Extract the JSON string from the ai_response column of row 0
json_str = df_prompts_responses.loc[0, 'ai_response']

# Load the JSON string into a dictionary
json_data = json.loads(json_str)

# Convert the dictionary into a DataFrame
df_json = pd.DataFrame(json_data)

# Display the new DataFrame
df_json.head()

Unnamed: 0,respondent_id,categories
0,114787220864,"[Curriculum, Teachers]"
1,114787220864,"[Policies & Administration, Culture & Virtues,..."
2,114787220864,"[Policies & Administration, Concern]"
3,114787220864,[Communication]
4,114787223354,"[Teachers, Good Outcomes]"
