In [19]:
import pandas as pd
import re

In [20]:
df1 = pd.read_csv("data.csv")
df2 = pd.read_csv("ref_pred.csv")

In [21]:
# df1

In [22]:
# df2

In [23]:
# extract all document IDs
def extract_all_doc_ids(prediction):
    doc_ids = re.findall(r'Document\[(\d+)\]', prediction) + re.findall(r'Docs\[(\d+)\]', prediction)
    additional_ids = re.findall(r'\[\d+\]', prediction)
    
    for id_str in additional_ids:
        cleaned_id = id_str.strip('[]')
        if cleaned_id.isdigit():
            doc_ids.append(cleaned_id)

    return doc_ids

In [24]:
# refine predictions and order document references
def refined_predictions_ordered(df1, df2):
    refined_predictions = []
    for _, row in df2.iterrows():
        doc_ids = extract_all_doc_ids(row['prediction'])
        unique_doc_ids = sorted(set(doc_ids), key=int) # remove duplicates and sort

        if unique_doc_ids:  # Check if there are any document IDs
            references = "\n\nReferences:\n\n"
            for doc_id in unique_doc_ids:
                doc_row = df1[df1['id'] == int(doc_id)]
                if not doc_row.empty:
                    headline = doc_row.iloc[0]['headline']
                    url = doc_row.iloc[0]['url']
                    references += f"- Document[{doc_id}] : [{headline}]({url})\n"
            verification_statement = "**News Verified!**\n\n"
        else:
            references = ""
            verification_statement = "**News Unverified!**\n\n"

        prediction_with_references =  verification_statement + row['prediction'] + references 
        refined_predictions.append(prediction_with_references)
    return refined_predictions


In [25]:
refined_ordered_predictions_list = refined_predictions_ordered(df1, df2)

In [28]:
refined_ordered_predictions_list

['**News Verified!**\n\nThe Prime Minister of Singapore is Lee Hsien Loong, as stated in Document[688], Document[1996], and Document[1774].</s>\n\nReferences:\n\n- Document[688] : [President Halimah to attend official mourning of UAE president in Abu Dhabi ](https://www.straitstimes.com/singapore/president-halimah-to-attend-official-mourning-of-uae-president-in-abu-dhabi)\n- Document[1774] : [Brunei&#039;s Sultan Hassanal Bolkiah on two-day state visit to Singapore ](https://www.straitstimes.com/singapore/bruneis-sultan-hassanal-bolkiah-on-two-day-state-visit-singapore)\n- Document[1996] : [PM Lee among top leaders to attend state funeral for ex-Japan PM Abe ](https://www.straitstimes.com/asia/east-asia/singapore-pm-lee-among-top-leaders-to-attend-state-funeral-for-ex-japan-pm-abe)\n',
 '**News Verified!**\n\nThe National University of Singapore (NUS) is the top university in Singapore, as indicated by Docs[364], [337], and [832].</s>\n\nReferences:\n\n- Document[337] : [Shooting: Mart

In [26]:
df3_refined_ordered = pd.DataFrame(refined_ordered_predictions_list, columns=['prediction'])
df3_refined_ordered.head()

Unnamed: 0,prediction
0,**News Verified!**\n\nThe Prime Minister of Si...
1,**News Verified!**\n\nThe National University ...
2,**News Verified!**\n\nThe temperature in Singa...
3,**News Verified!**\n\nAccording to Document[39...
4,**News Verified!**\n\nSingapore's men's team b...


In [27]:
df3_refined_ordered.to_csv('ref_pred_new.csv')

## Question: Who is the Prime Minister of Singapore?

### Answer: 

**News Verified!**

The Prime Minister of Singapore is Lee Hsien Loong, as stated in Document[688], Document[1996], and Document[1774].</s>

References:

- Document[688] : [President Halimah to attend official mourning of UAE president in Abu Dhabi ](https://www.straitstimes.com/singapore/president-halimah-to-attend-official-mourning-of-uae-president-in-abu-dhabi)
- Document[1774] : [Brunei&#039;s Sultan Hassanal Bolkiah on two-day state visit to Singapore ](https://www.straitstimes.com/singapore/bruneis-sultan-hassanal-bolkiah-on-two-day-state-visit-singapore)
- Document[1996] : [PM Lee among top leaders to attend state funeral for ex-Japan PM Abe ](https://www.straitstimes.com/asia/east-asia/singapore-pm-lee-among-top-leaders-to-attend-state-funeral-for-ex-japan-pm-abe)


## Question: What's the new scheme for Covid-19 swabs in Singapore?

### Answer: 

**News Unverified!**

The new scheme for Covid-19 swabs in Singapore is not provided in the given documents.</s>

## Question: What are the key takeaways from the recent Singapore Airshow?

### Answer: 

**News Verified!**

The key takeaways from the recent Singapore Airshow include building new homes in Paya Lebar and attracting top talent, as stated in Document[1739] and [926]. Additionally, the new Airbus can be more fuel-efficient and easily deployed onto alternative routes, as stated in Document[376].</s>

References:

- Document[376] : [SIA confirms order of 7 freight planes, 22 passenger aircraft engines as industry recovers ](https://www.straitstimes.com/singapore/sia-confirms-order-of-7-freight-planes-22-passenger-aircraft-engines-as-industry-recovers)
- Document[926] : [Kranji Primary teachers go the extra mile to provide daily after-school programme ](https://www.straitstimes.com/singapore/parenting-education/kranji-primary-teachers-go-the-extra-mile-to-provide-daily-after-school-programme)
- Document[1739] : [Morning Briefing: Top stories from The Straits Times on Aug 22 ](https://www.straitstimes.com/singapore/morning-briefing-top-stories-from-the-straits-times-on-aug-22-2022)


In [None]:
"Imsys/vicuna-7b-v1.5"