# SpeakAge - Predicting Child Age from Conversation Data
Author: Adi Salmon, Freida Barnatan


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

file_path = '/content/drive/MyDrive/Final_Project/dataset_with_child_features.csv'
df_loaded = pd.read_csv(file_path)

df_loaded.head()

## Evaluate Model Using Prompts

In this section, we evaluate the child age prediction model by providing GPT with prompts containing few-shot examples and conversation context. We compare the model’s predicted age groups with the true age_bin_readable values, compute accuracy, and analyze mispredictions to understand the impact of prompt design on performance.

### Predict Child Age Groups Using GPT-3.5

This section improves the previous approach by including 1–2 adult lines before each child utterance as context. This gives GPT more information about what the child is responding to, improving prediction accuracy. The pipeline handles token limits, cleans special tokens, truncates long conversations, and computes accuracy.

In [None]:
# ------------------ HEADER ------------------

import pandas as pd
import random
import ast
from openai import OpenAI

# Initialize OpenAI client
client = OpenAI(api_key="-- ")

# ------------------ DATA PREPARATION ------------------

# Ensure 'conversation' column is proper Python list
df['conversation'] = df['conversation'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Clean special tokens
def clean_text(text):
    return text.replace("UNRECOGNIZED_WORD", "[unintelligible]").replace("UNK", "[unknown]")

# ------------------ FUNCTION: CHILD LINES + CONTEXT ------------------

def conv_to_text_with_context(conv_list, context_window=1):
    """
    Extract child lines with a few lines of adult context.
    context_window: number of previous lines to include before CHI line.
    """
    lines = []
    for i, item in enumerate(conv_list):
        if isinstance(item, (tuple, list)) and len(item) == 2:
            speaker, text = item
            if speaker == 'CHI':
                # Include previous 'context_window' adult lines
                start_idx = max(0, i - context_window)
                context_lines = []
                for j in range(start_idx, i):
                    prev_speaker, prev_text = conv_list[j]
                    if prev_speaker != 'CHI':
                        context_lines.append(f"{prev_speaker}: {clean_text(prev_text)}")
                # Add current CHI line
                context_lines.append(f"{speaker}: {clean_text(text)}")
                lines.append(" ".join(context_lines))
    return " ".join(lines)

# ------------------ STEP 1: Build few-shot examples ------------------

age_bins = df['age_bin_readable'].unique()
n_examples_per_bin = 2  # examples per age bin
few_shot_examples = ""

for bin_val in age_bins:
    few_rows = df[df['age_bin_readable'] == bin_val].sample(
        n=min(n_examples_per_bin, len(df[df['age_bin_readable'] == bin_val])), random_state=42
    )
    for _, row in few_rows.iterrows():
        conv_text = conv_to_text_with_context(row['conversation'], context_window=1)
        # Truncate to 200 words to avoid token overflow
        conv_words = conv_text.split()
        if len(conv_words) > 200:
            conv_text = " ".join(conv_words[:200])
        few_shot_examples += f"""
Conversation: {conv_text}
Age group: {row['age_bin_readable']}
"""

# ------------------ STEP 2: Sample test rows ------------------

n_samples = min(20, len(df))
sample_df = df.sample(n=n_samples, random_state=24).copy()

# ------------------ STEP 3: Build prompt template ------------------

prompt_template = f"""
You are a child development expert.

Below are examples of conversations with their correct age groups:
{few_shot_examples}

Now, given the following conversation between a mother (MOT) and her child (CHI), estimate the child's age and assign it to an age group.

Important:
- "CHI" means child
- "MOT" means mother
- "INV" means interviewer
- "ADU", "GMA", "UNK" are other adults
- Focus on the child's words (vocabulary, sentence length, complexity) to decide the age group

Conversation: {{conversation_text}}

Answer format: Age group: <{', '.join(age_bins)}>
"""

# ------------------ STEP 4: Generate predictions ------------------

predicted_age_bins = []

for conv in sample_df['conversation']:
    conv_text = conv_to_text_with_context(conv, context_window=1)
    # Truncate long text to 300 words
    conv_words = conv_text.split()
    if len(conv_words) > 300:
        conv_text = " ".join(conv_words[:300])

    prompt = prompt_template.format(conversation_text=conv_text)

    # Call GPT
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )

    pred = response.choices[0].message.content.strip()
    pred_clean = pred.split(":")[-1].strip()
    predicted_age_bins.append(pred_clean)

# ------------------ STEP 5: Store predictions and evaluate ------------------

sample_df['predicted_age_bin_clean'] = predicted_age_bins
sample_df['age_bin_clean'] = sample_df['age_bin_readable'].str.extract(r'(\d-\d)')[0]
sample_df['correct'] = sample_df['predicted_age_bin_clean'] == sample_df['age_bin_clean']

# Accuracy
accuracy = sample_df['correct'].mean()
print(f"Accuracy on random sample: {accuracy:.2%}")

# Show mispredictions
misclassified = sample_df[sample_df['correct'] == False][['conversation','age_bin_clean','predicted_age_bin_clean']]
print(misclassified)

Accuracy on random sample: 65.00%
                                           conversation age_bin_clean  \
1416  [(CHI, this is for you Mama), (CHI, this is fo...           3-5   
1680  [(ADU, what do you wanna play with?), (CHI, we...           3-5   
810   [(MOT, come here Chi come over here), (CHI, yo...           5-6   
476   [(MOT, let me try it on), (MOT, I might like i...           3-5   
1829  [(UNK, what that lady's name?), (UNK, Steve do...           3-5   
623   [(INV, I want you to give us a look at this wi...           3-5   
462   [(GMA, UNRECOGNIZED_WORD turn the table over a...           3-5   

     predicted_age_bin_clean  
1416                     2-3  
1680                     2-3  
810                      2-3  
476                      2-3  
1829                     2-3  
623                      2-3  
462                      5-6  


### Improve predict Child Age Groups Using GPT-3.5

In [None]:
# ------------------ HEADER ------------------

import pandas as pd
import random
import ast
from openai import OpenAI

# Initialize OpenAI client
client = OpenAI(api_key="---")

# ------------------ DATA PREPARATION ------------------

# Ensure 'conversation' column is a proper Python list
df['conversation'] = df['conversation'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Function to convert conversation tuples to text
def conv_to_text(conv_list, keep_speaker=None):
    """
    Convert conversation tuples to text.
    keep_speaker: 'CHI' to keep only child lines, None to keep all speakers.
    """
    lines = []
    for item in conv_list:
        if isinstance(item, (tuple, list)) and len(item) == 2:
            speaker, text = item
            if keep_speaker is None or speaker == keep_speaker:
                lines.append(f"{speaker}: {text}")
    return " ".join(lines)

# Clean special tokens
def clean_text(text):
    return text.replace("UNRECOGNIZED_WORD", "[unintelligible]").replace("UNK", "[unknown]")

# ------------------ STEP 1: Prepare few-shot examples ------------------

age_bins = df['age_bin_readable'].unique()
few_shot_examples = ""

for bin_val in age_bins:
    # Take the first example of this age bin
    row = df[df['age_bin_readable'] == bin_val].iloc[0]

    # Use only CHI lines
    conv_text = conv_to_text([(s, clean_text(t)) for s, t in row['conversation']], keep_speaker='CHI')

    # Truncate to 200 words max
    conv_words = conv_text.split()
    if len(conv_words) > 200:
        conv_text = " ".join(conv_words[:200])

    few_shot_examples += f"""
Conversation: {conv_text}
Age group: {row['age_bin_readable']}
"""

print("Few-shot examples prepared for all age bins.")

# ------------------ STEP 2: Sample test rows ------------------

n_samples = min(20, len(df))
sample_df = df.sample(n=n_samples, random_state=24).copy()

# ------------------ STEP 3: Build prompt template ------------------

prompt_template = f"""
You are a child development expert.

Below are examples of conversations with their correct age groups:
{few_shot_examples}

Now, given the following conversation between a mother (MOT) and her child (CHI), estimate the child's age and assign it to an age group.

Important:
- "CHI" means child
- "MOT" means mother
- "INV" means interviewer
- "ADU", "GMA", "UNK" are other adults
- Focus on the child's words (vocabulary, sentence length, complexity) to decide the age group

Conversation: {{conversation_text}}

Answer format: Age group: <{', '.join(age_bins)}>
"""

# ------------------ STEP 4: Generate predictions ------------------

predicted_age_bins = []

for conv in sample_df['conversation']:
    # Use only CHI lines for prediction
    conv_text = conv_to_text([(s, clean_text(t)) for s, t in conv], keep_speaker='CHI')
    # Truncate if too long
    conv_words = conv_text.split()
    if len(conv_words) > 300:
        conv_text = " ".join(conv_words[:300])

    prompt = prompt_template.format(conversation_text=conv_text)

    # Call GPT
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )

    # Extract predicted age bin
    pred = response.choices[0].message.content.strip()
    pred_clean = pred.split(":")[-1].strip()
    predicted_age_bins.append(pred_clean)

# ------------------ STEP 5: Store predictions and evaluate ------------------

sample_df['predicted_age_bin_clean'] = predicted_age_bins
sample_df['age_bin_clean'] = sample_df['age_bin_readable'].str.extract(r'(\d-\d)')[0]
sample_df['correct'] = sample_df['predicted_age_bin_clean'] == sample_df['age_bin_clean']

# Accuracy
accuracy = sample_df['correct'].mean()
print(f"Accuracy on random sample: {accuracy:.2%}")

# Show mispredictions
misclassified = sample_df[sample_df['correct'] == False][['conversation','age_bin_clean','predicted_age_bin_clean']]
print(misclassified)

Few-shot examples prepared for all age bins.
Accuracy on random sample: 70.00%
                                           conversation age_bin_clean  \
1416  [(CHI, this is for you Mama), (CHI, this is fo...           3-5   
810   [(MOT, come here Chi come over here), (CHI, yo...           5-6   
623   [(INV, I want you to give us a look at this wi...           3-5   
462   [(GMA, UNRECOGNIZED_WORD turn the table over a...           3-5   
267   [(INV, will you help me make a nice picture?),...           2-3   
1590  [(MOT, what do you think?), (CHI, there's two ...           2-3   

     predicted_age_bin_clean  
1416                     2-3  
810                      3-5  
623                      2-3  
462                      5-6  
267                      3-5  
1590                     3-5  


In [None]:
# ------------------ HEADER ------------------
import pandas as pd
import ast
from openai import OpenAI

# Initialize OpenAI client
client = OpenAI(api_key="---")

# ------------------ DATA PREPARATION ------------------
# Ensure 'conversation' column is proper Python list
df['conversation'] = df['conversation'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Clean special tokens
def clean_text(text):
    return text.replace("UNRECOGNIZED_WORD", "[unintelligible]").replace("UNK", "[unknown]").replace("ADU", "[adult]")

# Normalize child text
def normalize_child_text(text):
    return clean_text(text).lower()

# ------------------ FUNCTION: CHILD LINES + CONTEXT ------------------
def conv_to_text_with_context(conv_list, context_window=1):
    """
    Extract child lines with a few lines of adult context.
    context_window: number of previous lines to include before CHI line.
    """
    lines = []
    for i, item in enumerate(conv_list):
        if isinstance(item, (tuple, list)) and len(item) == 2:
            speaker, text = item
            if speaker == 'CHI':
                # Include previous 'context_window' adult lines
                start_idx = max(0, i - context_window)
                context_lines = []
                for j in range(start_idx, i):
                    prev_speaker, prev_text = conv_list[j]
                    if prev_speaker != 'CHI':
                        context_lines.append(f"{prev_speaker}: {normalize_child_text(prev_text)}")
                # Add current CHI line
                context_lines.append(f"{speaker}: {normalize_child_text(text)}")
                lines.append(" ".join(context_lines))
    return " ".join(lines)

# ------------------ STEP 1: Build few-shot examples (1 per age bin) ------------------
age_bins = df['age_bin_readable'].unique()
few_shot_examples = ""

for bin_val in age_bins:
    # Pick 1 representative row per age bin
    row = df[df['age_bin_readable'] == bin_val].sample(n=1, random_state=42).iloc[0]
    conv_text = conv_to_text_with_context(row['conversation'], context_window=1)
    conv_words = conv_text.split()
    if len(conv_words) > 200:
        conv_text = " ".join(conv_words[:200])
    few_shot_examples += f"""
Conversation: {conv_text}
Age group: {row['age_bin_readable']}
"""

print("Few-shot examples prepared for all age bins.")

# ------------------ STEP 2: Sample test rows ------------------
n_samples = min(20, len(df))
sample_df = df.sample(n=n_samples, random_state=24).copy()

# ------------------ STEP 3: Build prompt template ------------------
prompt_template = f"""
You are a child development expert.

Below are examples of conversations with their correct age groups:
{few_shot_examples}

Now, given the following conversation between a mother (MOT) and her child (CHI), estimate the child's age and assign it to an age group.

Important:
- "CHI" means child
- "MOT" means mother
- "INV" means interviewer
- "ADU", "GMA", "UNK" are other adults
- Focus on the child's words, vocabulary, sentence length, and complexity to decide the age group

Conversation: {{conversation_text}}

Answer format: Age group: <{', '.join(age_bins)}>
"""

# ------------------ STEP 4: Generate predictions ------------------
predicted_age_bins = []

for conv in sample_df['conversation']:
    conv_text = conv_to_text_with_context(conv, context_window=1)
    conv_words = conv_text.split()
    if len(conv_words) > 300:
        conv_text = " ".join(conv_words[:300])  # take first 300 words

    prompt = prompt_template.format(conversation_text=conv_text)

    # Call GPT
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )

    pred = response.choices[0].message.content.strip()
    pred_clean = pred.split(":")[-1].strip()
    predicted_age_bins.append(pred_clean)

# ------------------ STEP 5: Store predictions and evaluate ------------------
sample_df['predicted_age_bin_clean'] = predicted_age_bins
sample_df['age_bin_clean'] = sample_df['age_bin_readable'].str.extract(r'(\d-\d)')[0]
sample_df['correct'] = sample_df['predicted_age_bin_clean'] == sample_df['age_bin_clean']

# Accuracy
accuracy = sample_df['correct'].mean()
print(f"Accuracy on random sample: {accuracy:.2%}")

# Show mispredictions
misclassified = sample_df[sample_df['correct'] == False][['conversation','age_bin_clean','predicted_age_bin_clean']]
print(misclassified)

Few-shot examples prepared for all age bins.
Accuracy on random sample: 70.00%
                                           conversation age_bin_clean  \
810   [(MOT, come here Chi come over here), (CHI, yo...           5-6   
1613  [(MOT, I came home late), (CHI, what's in here...           2-3   
623   [(INV, I want you to give us a look at this wi...           3-5   
970   [(MOT, Chi would you mind just telling the lad...           3-5   
462   [(GMA, UNRECOGNIZED_WORD turn the table over a...           3-5   
1590  [(MOT, what do you think?), (CHI, there's two ...           2-3   

     predicted_age_bin_clean  
810                      3-5  
1613                     3-5  
623                      2-3  
970                      5-6  
462                      5-6  
1590                     3-5  
