In [228]:
pwd

'/Users/spangher/Projects/berkeley-research/news-interview-question-generation/notebooks'

In [None]:
import gdown
import pandas as pd 

In [4]:
gdown.download(
    'https://docs.google.com/spreadsheets/d/1YTd9Z-w9CrVEW4Mj_Qjv2G8ZEfxA91B8mcfQShM38SY/edit?usp=sharing',
    fuzzy=True,
    output='cache/type_classification_annotations.xlsx'
)

Downloading...
From (original): https://drive.google.com/uc?id=1YTd9Z-w9CrVEW4Mj_Qjv2G8ZEfxA91B8mcfQShM38SY
From (redirected): https://docs.google.com/spreadsheets/d/1YTd9Z-w9CrVEW4Mj_Qjv2G8ZEfxA91B8mcfQShM38SY/export?format=xlsx
To: /Users/spangher/Projects/berkeley-research/news-interview-question-generation/notebooks/cache/type_classification_annotations.xlsx
4.61MB [00:00, 11.4MB/s]


'cache/type_classification_annotations.xlsx'

In [7]:
annotated_df = pd.read_excel('cache/type_classification_annotations.xlsx', sheet_name='all_interviews_combined_v6')

In [15]:
from sklearn.preprocessing import MultiLabelBinarizer

In [43]:
label_df = annotated_df[['Question type', 'Human Label']].apply(lambda s: s.str.lower()).dropna()
label_df = label_df.apply(lambda s: s.str.replace('-', ' ').str.replace('starting ', 'starting/ending ').str.split(r',\s', regex=True))

In [44]:
mlb= MultiLabelBinarizer()

In [45]:
mlb.fit(label_df.unstack())

In [46]:
pd.Series(mlb.classes_).sort_values()

0       acknowledgement statement
1             broadening question
2              challenge question
3             definition question
4              follow up question
5    opinion/speculation question
6         starting/ending remarks
7       topic transition question
8           verification question
dtype: object

In [None]:
y_pred_df = label_df['Question type'].pipe(lambda s: pd.DataFrame(mlb.transform(s), columns=mlb.classes_))
y_true_df = label_df['Human Label'].pipe(lambda s: pd.DataFrame(mlb.transform(s), columns=mlb.classes_))

In [61]:
from sklearn.metrics import f1_score, recall_score, precision_score

In [79]:
results_df = pd.concat([
    pd.Series(precision_score(y_true_df, y_pred_df, average=None)).to_frame('precision'),
    pd.Series(recall_score(y_true_df, y_pred_df, average=None)).to_frame('recall'),
    pd.Series(f1_score(y_true_df, y_pred_df, average=None)).to_frame('f1-score')
], axis=1)
results_df.index = mlb.classes_

In [80]:
results_df.round(2)

Unnamed: 0,precision,recall,f1-score
acknowledgement statement,1.0,1.0,1.0
broadening question,0.58,0.7,0.64
challenge question,1.0,1.0,1.0
definition question,1.0,1.0,1.0
follow up question,0.59,0.87,0.7
opinion/speculation question,0.75,0.75,0.75
starting/ending remarks,0.9,1.0,0.95
topic transition question,0.75,0.47,0.58
verification question,0.89,0.8,0.84


# Doesn't Work

In [83]:
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import confusion_matrix

In [82]:
multilabel_confusion_matrix(y_true_df, y_pred_df)

array([[[83,  0],
        [ 0,  4]],

       [[72,  5],
        [ 3,  7]],

       [[81,  0],
        [ 0,  6]],

       [[86,  0],
        [ 0,  1]],

       [[50, 14],
        [ 3, 20]],

       [[72,  3],
        [ 3,  9]],

       [[67,  2],
        [ 0, 18]],

       [[65,  3],
        [10,  9]],

       [[76,  1],
        [ 2,  8]]])

# Experiment with prompt tuning

In [224]:
TAXONOMY = [
    "starting/ending remarks"
    "acknowledgement statement",
    "follow-up question",
    "topic-transition question",
    "opinion/speculation question",
    "verification question",
    "challenge question", 
    "broadening question"
]

DEFINITIONS = '''
 - Starting/Ending Remarks:
   - Definition: Initiates or concludes the interview. Often not be in the form of a question.
 - Outline-Level Question:
   - Definition: Introduces a top-level topic into the conversation. Shifts the conversation from one subject to another. These questions are evidence of outline-level goals in the interview that the journalist wishes to ask, not simply responding to previous questions.
 - Acknowledgement Statement:
   - Definition: Affirms the interviewee, often by explicitly affirming the interviewee's previous response. This can create rapport, demonstrate active listening and empathy.
 - Follow-Up Question:
   - Definition: Digs deeper into a topic being discussed, seeks further elaboration, or re-phrases a previous question in a way that keeps the interview on the same topic.
 - Verification Question:
   - Definition: Confirms the accuracy of a statement, fact, or assumption. This type of question seeks to ensure that information is correct and reliable.
 - Opinion/Speculation Question:
   - Definition: Solicits the interviewee's personal views or predictions about a subject. Can reveal biases and insights.
 - Challenge Question:
   - Definition: Tests the interviewee's position, argument, or credibility. These questions are often used to provoke thought, debate, or to highlight inconsistencies.
 - Broadening Question:
   - Definition: To expand the scope of the discussion, encouraging the interviewee to think about the topic in a broader context or from different perspectives.
'''

FEW_SHOT_EXAMPLES = '''
  Previous Question Context: The economic impact of a newly implemented policy or mandate.
  Question: Can you explain more about how the mandate is hurting the economy?
  Response:
  The question seeks to dive deeper into a topic and get more information.
  [Follow-Up Question]
  
  Previous Question Context: A starting remark introducing the interviewee and background.
  Question: Now I want to talk about Syria. Can you explain how your work in Aleppo changed your career?
  Response:
  The previous question (starting remark) was the first question in the interview. Typically, the question after the starting remark is a topic-transition question. We verify this is indeed the case, as the topic shifted from (topic A) an introduction of the interviewee to (topic B) Syria and how his work there impacted his life.
  [Outline-Level Question]

  Previous Question Context: Presidential debate between Donald Trump and Hillary Clinton.
  Question: Let's look forward to the vice presidential debate. This is happening Tuesday. Mike Pence, Tim Kaine will go head to head. We haven't heard a whole lot from either of them so far. Do you think they're just going to echo what their running mates have been saying?
  Response: 
  The topic has shifted from (topic A) the presidential debate to (topic B) the vice presidential debate. After some context, the interviewer then asks for the interviewee's opinion.
  [Outline-Level Question, Opinion/Speculation Question]

  Previous Question Context: Discussion on the ongoing handling of the COVID-19 pandemic by various government administrations.
  Question: Do you believe the current administration is handling the pandemic well?
  Response:
  The question appears to be asking for an opinion rather than a set of facts.
  [Opinion/Speculation Question]

  Previous Question Context: The interviewee has just claimed that there will be a rise in unemployment in the next decade years.
  Question: Can you provide evidence to support that claim?
  Response:
  The journalist is asking for further details specifically to back up a previous remark.
  [Verification Question]

  Previous Question Context: The interviewee has just talked about brain size and intelligence.
  Question: Overall, what's the importance of this right now? Why - this debate has been framed by some as beyond the pale, we shouldn't even discuss it. Do you think, first of all, that it is a distraction to discuss it, something that's really important for us to revisit?
  Response: 
  The journalist is encouraging the source to place these remarks in the context of a broader conversation of how it affects society.
  [Broadening Question]

  Previous Question Context: The interviewer has just started the interview, introduced the interviewee as a tour guide operator in Afghanistan, and discussed a recent attack.
  Question: We should first say the minibus that was recently attacked was not one of your tours. But you have led groups in Afghanistan, and I just have to ask why.
  Response: 
  The journalist is following up on the introductory remarks, and introducing a new focus on the interviewee.
  [Outline-Level Question, Follow-Up Question]
'''

FORMAT = '''
Respond in this way:
  1. First, break this down, step-by-step:
    * What are the primary discussion points the journalist wishes to have during the conversation?
    * How does the current question relate to the previous question? 
    * Does the current question shift the focus in some way from the previous question/remark and shift to a different primary discussion point? (If yes, then this is may be an Outline-Level Question or Broadening Question)
    * Or does continue in the same line of inquiry? (If yes, then this is probably any of the others)
  2. Based on this reasoning, pick the single label, or labels, you think best categorize the question, based on the schema above. 
  3. Return the labels you select as a comma-separated list INSIDE brackets. Return the reasoning in part 1 BEFORE the brackets.
'''

def get_classify_all_questions_taxonomy_prompt(transcript, question):
      prompt = f'''
      I am trying to understand the kinds of questions asked by journalists. 
      I will show you the transcript between the journalist (interviewer) and source (interviewee). I will then ask about a specific question in that transcript.
      Please label the question according to the following 8 categories of questions we've identified.

      Here are the schema categories:

      {DEFINITIONS}

      Here are some examples of questions, a summary of prior context, and the schema categories they belong to:

      ```{FEW_SHOT_EXAMPLES}```

      Ok, now it's your turn. Here is the interview transcript:
      ```{transcript}```

      And here is the question from the transcript I want you to classify using the taxonomy: 
      Question: ```{question}```
      Now it's your turn.

      {FORMAT}
      
      Please respond now:
      '''
      return prompt

In [168]:
import ast
from tqdm.auto import tqdm

In [105]:
dataset_df = pd.read_csv('../data/final_dataset.csv')

In [None]:
tqdm.pandas()
dataset_df['utt'] = dataset_df['utt'].progress_apply(ast.literal_eval)
dataset_df['speaker'] = dataset_df['speaker'].progress_apply(ast.literal_eval)

In [218]:
t = '''Absolutely. You know, it's first important to point out that when you measure visual improvements in patients with low vision, this is very difficult. There's no consensus in part because of current - there's currently no treatments. But that being said, the vision in both patients appears to have improved after transplantation of the cells, again even at the lowest dose. So before treatment, the Stargardt's patient could only detect hand motions. But within a week after treatment, she was able to actually start counting fingers. Indeed before the treatment, she couldn't read any letters on the standard visual acuity chart, but by two weeks, she started reading letters, and by one month, she could actually read five letters. But I think it's important to point out that that doesn't really capture the difference that this makes in their life. So for instance the Stargardt's patient reports she can now see more color, and she's a graphic artist, so of course that's very important to her. She had better contrast and dark adaptation out of the operated eye. For instance, the dry AMD patient can now use her computer again. She can even read her watch. So little things like that, which we all take for granted, obviously can make a huge difference in the quality of a person's life.'''
t = '''I think everyone's got their own individual reason. For the people that have traveled with me - I've taken people who have been fascinated by the beautiful mountain ranges - you've got the Hindu Kush, you've got the Pamir range. I've got other people who have been fascinated by the culture and the history and want to see some of the legacy of the empires that have sat in Afghanistan. So I think everyone's got their own personal story for wanting to go and visit.'''
t = '''So it sounds like you're saying that people who are in these oil-rich nations may not actually see a lot of that wealth.'''

In [219]:
d = (dataset_df
 .loc[lambda df: df['combined_dialogue'].str.contains(t)]
 .iloc[0]
)

In [210]:
d

id                                                              NPR-43
program                                                   News & Notes
date                                                        2007-11-20
url                  https://www.npr.org/templates/story/story.php?...
title                                  Africa Update: Oil and Politics
summary              This week, we take a look at the role of oil i...
utt                  [I'm Farai Chideya, and this is NEWS & NOTES.,...
speaker              [FARAI CHIDEYA, host, FARAI CHIDEYA, host, FAR...
combined_dialogue    \nFARAI CHIDEYA, host: I'm Farai Chideya, and ...
Name: 10, dtype: object

In [220]:
transcript = d['combined_dialogue']

In [221]:
list(enumerate(zip(d['speaker'], d['utt'])))

[(0, ('FARAI CHIDEYA, host', "I'm Farai Chideya, and this is NEWS & NOTES.")),
 (1,
  ('FARAI CHIDEYA, host',
   "It's Tuesday and time for Africa Update. This week, we take a look at how Africa's oil affects the cost of gas in America.")),
 (2,
  ('FARAI CHIDEYA, host',
   'Plus, a former Congolese militia leader goes on trial; the charge: recruiting and training child soldiers.')),
 (3,
  ('FARAI CHIDEYA, host',
   "For more we've got Edmond Keller. He's director of the Globalization Research Center on Africa at the University of California, Los Angeles.")),
 (4, ('FARAI CHIDEYA, host', 'Welcome, professor.')),
 (5,
  ('Dr. EDMOND KELLER (Professor of Political Science and Director of Globalization Research Center on Africa, University of California, Los Angeles)',
   "Thank you. I'm glad to be here.")),
 (6,
  ('FARAI CHIDEYA, host',
   "Yes, I'm glad to have you right here in the room with me.")),
 (7,
  ('FARAI CHIDEYA, host',
   'Now, in the past two weeks, the average price for 

In [222]:
question = d['utt'][7]

In [225]:
from pyperclip import copy
t = get_classify_all_questions_taxonomy_prompt(transcript, question)
print(t)
copy(t)


      I am trying to understand the kinds of questions asked by journalists. 
      I will show you the transcript between the journalist (interviewer) and source (interviewee). I will then ask about a specific question in that transcript.
      Please label the question according to the following 8 categories of questions we've identified.

      Here are the schema categories:

      
 - Starting/Ending Remarks:
   - Definition: Initiates or concludes the interview. Often not be in the form of a question.
 - Outline-Level Question:
   - Definition: Introduces a top-level topic into the conversation. Shifts the conversation from one subject to another. These questions are evidence of outline-level goals in the interview that the journalist wishes to ask, not simply responding to previous questions.
 - Acknowledgement Statement:
   - Definition: Affirms the interviewee, often by explicitly affirming the interviewee's previous response. This can create rapport, demonstrate active liste

In [215]:
question

'Now, in the past two weeks, the average price for a gallon of gas in the U.S. has jumped to more than $3. The cost of a barrel of oil was nearly a hundred dollars. Mideast oil might come to mind, but Americans also used millions of barrels of oil imported from Africa every day, so what percent of U.S. oil consumption comes from the continent?'

In [138]:
dataset_df.shape 

(45848, 9)