## Analyze Extracted Goals and Source Goal Phrases

This notebook computes statistics from generated goals, including the frequency of goals extracted from interviewer versus stakeholder turns, the number of turns used to extract a goal, etc.

In [2]:
import json

data_path = 'data1'

results = json.load(open('%s/extracted-moving.json' % data_path, 'r'))
data = json.load(open('%s/transcripts.json' % data_path, 'r'))

In [4]:
for k, v in results['10'][2].items():
    print('%s: %s' % (k, v))

excerpt: Interviewer: Sure. So I'm basically trying to understand how users use weather apps. What are they looking for? And just a lot of requirements that they might have. So I'm just gonna start asking you a few questions and. Can you just tell me how things are based on your personal experience? Does that sound good.

Stakeholder: Yeah, sounds. Perfect.

Interviewer: Awesome. Okay? So for the 1st question, what is the primary purpose you want to achieve when using a weather app.

Stakeholder: So for me, primary purpose is a planning. So whenever I use weather app, it's either to plan what I have to wear on a particular day or plan my outdoor activities. or just plan my trip or something.

goals: ['Plan what to wear based on weather forecast.', 'Plan outdoor activities based on weather conditions.', 'Plan trips taking into account weather forecasts.']
phrases: [{'goal': 'Plan what to wear based on weather forecast.', 'phrases': ['plan what I have to wear on a particular day']}, {'go

In [5]:
from collections import Counter
import re

def annotate(excerpt, phrases):
    index = []
    for p in phrases:
        for m in re.finditer(p, excerpt):
            index.append([m.start(), '['])
            index.append([m.end(), ']'])

    
    for i, c in sorted(index, key = lambda x:x[0], reverse = True):
        excerpt = excerpt[:i] + c + excerpt[i:]
    return excerpt

def eval_extracted(extracted, counter=Counter(), base_turn=0):
    errors = []
    review = []
    for e in extracted:
        for g in e['phrases']:
            # if g is not a dict, then model failed to follow format instructions
            if not isinstance(g, dict):
                counter['type errors'] += 1
                continue

            # if g is missing phrases key, then model failed to follow format instructions
            elif not 'phrases' in g:
                counter['type errors'] += 1
                continue

            # if phrase goal doesn't match original generated goal
            if g['goal'] not in e['goals']:
                counter['goal mismatch'] += 1

            # if phrase list is empty
            if len(g['phrases']) == 0:
                counter['no phrases'] += 1

            # check for phrase match separately for interviewer and stakeholder
            i_c_total = 0
            s_c_total = 0
            i_c = 0
            s_c = 0
            turn_match = []
            for p in g['phrases']:
                turns = e['excerpt'].split('\n\n')
                i_c = 0
                s_c = 0
                for i, t in enumerate(turns):
                    i_p = re.findall(r'Interviewer:\s(.+)', t)
                    s_p = re.findall(r'Stakeholder:\s(.+)', t)
                    
                    i_c_t = sum([a.lower().count(p.lower()) for a in i_p])
                    s_c_t = sum([a.lower().count(p.lower()) for a in s_p])
                    if i_c_t + s_c_t > 0:
                        turn_match.append(base_turn + i)

                    i_c += i_c_t
                    s_c += s_c_t

                # if not match between interviewer and stakeholder
                if i_c + s_c == 0:
                    counter['unmatched phrase'] += 1
                    errors.append({'excerpt': e['excerpt'], 'phrase': p, 'goal': g['goal']})

                # if only one match between interviewer and stakeholder
                if i_c + s_c == 1:
                    counter['unique phrases'] += 1
                
                # count speaker role from which matches were found
                counter['from interviewer'] += i_c
                i_c_total += i_c
                s_c_total += s_c
                counter['from stakeholder'] += s_c
                counter['total phrases'] += 1
                
            # if interviewer and stakeholder matched for one goal, then count multi-turn goal
            multi = 'N'
            counter['interviewer goal'] += 1 if i_c > 0 else 0
            counter['stakeholder goal'] += 1 if s_c > 0 else 0
            if i_c_total > 0 and s_c_total > 0:
                counter['multi-turn goal'] += 1
                multi = 'Y'
            if i_c_total + s_c_total > 0:
                review.append({'index': len(review), 'goal': g['goal'], 'excerpt': annotate(e['excerpt'], g['phrases']), 's_match': s_c_total, 'i_match': i_c_total, 'turn_match': turn_match, 'multiturn': multi})
    return counter, errors, review

In [10]:
from collections import Counter
import csv, math

goals = {}
global_count = Counter()
counters = []
match_dist = Counter()
i = 0
for transcript, (key, result) in zip(data['transcript'], results.items()):
    goals[key] = []
    
    for j in range(len(result)):
        counter, errors, review = eval_extracted([result[j]], Counter(), base_turn = j * 2)
        counter['total turns'] = len(transcript)
        counters.append(counter)
        
        for k, c in counter.items():
            global_count[k] += c
    
        with open('%s/cache/%s.%i-generated-goals.csv' % (data_path, i, j), 'w') as f:
            writer = csv.DictWriter(f, fieldnames=['index', 'excerpt', 'goal', 'i_match', 's_match', 'turn_match', 'multiturn'])
            writer.writeheader()
            writer.writerows(review)
    
        # record population data for evaluating the prompt
        goals[key].append([])
        for r in review:
            goals[key][j].append(r['goal'])
            
            for m in r['turn_match']:
                norm_m = math.floor(10 * m / counter['total turns'])
                match_dist[norm_m] += 1

# how does the distribution compare to 'lost in the middle'?
for i in sorted(match_dist.keys()):
    print('%i\t%i' % (i, match_dist[i]))

0	243
1	577
2	701
3	614
4	660
5	530
6	601
7	586
8	451
9	256


In [11]:
fieldnames = set([k for c in counters for k in c.keys()])
with open('%s/statistics.csv' % data_path, 'w') as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(counters)

In [12]:
json.dump(goals, open('%s/extracted-goals.json' % data_path, 'w'))

In [13]:
print(global_count)

Counter({'total turns': 133084, 'total phrases': 5544, 'unique phrases': 5179, 'from stakeholder': 4987, 'stakeholder goal': 2762, 'unmatched phrase': 334, 'type errors': 268, 'from interviewer': 258, 'interviewer goal': 125, 'multi-turn goal': 88})
