In [25]:
%load_ext autoreload
%autoreload 2

In [1]:
import tiktoken

In [2]:
text = "OpenAI is a private research laboratory. It aims to develop and direct artificial intelligence (AI) in ways that benefit humanity as a whole."

In [3]:
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")

In [4]:
tokens = encoding.encode(text)
len(tokens)

28

In [33]:
import tiktoken
def count_tokens(text):
    encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
    tokens = encoding.encode(text)
    return len(tokens)

In [5]:
import requests
def scrape_jina_ai(url:str)->str:
    response = requests.get("https://r.jina.ai/"+url)
    return response.text

In [16]:
import re

def clean_up_markdown_link(markdown):
    # cleaned_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', markdown)
    cleaned_text = re.sub(r'!\[.*?\]\(.*?\)|\[(.*?)\]\(.*?\)', r'\1', markdown)
    return cleaned_text

In [86]:
from research_rocket.summary_agent import SummaryAgent
from research_rocket.blog_writer import BlogWriter
sa = SummaryAgent()
bw = BlogWriter()

# Steps to create articles: 
- create outline
- search for each section in outline and returns as contexts
- summarize each context  
- use all contexts under the q to summarize
- create a blog based on outline

# Token Count  
Example:  

Overview of Insurance Fraud:  
- what is insurance fraud look like  
- what is the impact on insurance fraud in USA  
- how many types of insurance frauds are  
- how to detect insurance fraud  

# q1: what is insurance fraud look like  
- https://www.insurance.ca.gov/0300-fraud/0100-fraud-division-overview/05-ins-fraud/
- https://www.attorneygeneral.gov/protect-yourself/insurance-fraud/types-of-insurance-fraud/
- https://www.allstate.com/resources/car-insurance/types-of-car-insurance-fraud
- https://en.wikipedia.org/wiki/Insurance_fraud
- https://www.vero.co.nz/vero-voice/understanding-insurance-fraud.html

In [28]:
q1 = "what is insurance fraud look like"

In [6]:
%%time
q1_urls = [
    "https://www.insurance.ca.gov/0300-fraud/0100-fraud-division-overview/05-ins-fraud/",
    "https://www.attorneygeneral.gov/protect-yourself/insurance-fraud/types-of-insurance-fraud/",
    "https://www.allstate.com/resources/car-insurance/types-of-car-insurance-fraud",
    "https://en.wikipedia.org/wiki/Insurance_fraud",
    "https://www.vero.co.nz/vero-voice/understanding-insurance-fraud.html",
]

q1_texts = [scrape_jina_ai(url) for url in q1_urls]

CPU times: user 21.6 ms, sys: 5.2 ms, total: 26.8 ms
Wall time: 24.6 s


In [17]:
%%time
q1_texts = [clean_up_markdown_link(t) for t in q1_texts]

CPU times: user 882 μs, sys: 0 ns, total: 882 μs
Wall time: 888 μs


In [42]:
sa = SummaryAgent()

sa.run(dict(context=q1_texts[0], message=q1))

None


In [43]:
count_tokens(sa.system_prompt), count_tokens(sa.input_text), count_tokens(sa.output_text)

(59, 560, 244)

In [40]:
count_tokens(sa.input_text)

560

In [36]:
count_tokens(sa.agent.prompt_generator.as_prompt())

59

In [34]:
count_tokens(_answer)

244

# q2: what is the impact on insurance fraud in USA  
- https://insurancefraud.org/fraud-stats/
- https://www.conroysimberg.com/blog/insurance-fraud-costs-the-u-s-308-billion-annually/
- https://legal.thomsonreuters.com/en/insights/articles/identity-frauds-impact-on-the-insurance-sector
- https://www.webberwentzel.com/News/Pages/the-costs-and-consequences-of-insurance-fraud.aspx
- https://www.insurance.ca.gov/01-consumers/105-type/95-guides/15-gen/insur-fraud-is-felony.cfm

In [44]:
q2 = "what is the impact on insurance fraud in USA"

In [19]:
%%time
q2_urls = [
    "https://insurancefraud.org/fraud-stats/",
    "https://www.conroysimberg.com/blog/insurance-fraud-costs-the-u-s-308-billion-annually/",
    "https://legal.thomsonreuters.com/en/insights/articles/identity-frauds-impact-on-the-insurance-sector",
    "https://www.webberwentzel.com/News/Pages/the-costs-and-consequences-of-insurance-fraud.aspx",
    "https://www.insurance.ca.gov/01-consumers/105-type/95-guides/15-gen/insur-fraud-is-felony.cfm",
]

q2_texts = [scrape_jina_ai(url) for url in q2_urls]

CPU times: user 27.4 ms, sys: 0 ns, total: 27.4 ms
Wall time: 41.6 s


In [20]:
q2_texts = [clean_up_markdown_link(t) for t in q2_texts]

# q3: how many types of insurance frauds are  
- https://en.wikipedia.org/wiki/Insurance_fraud#:~:text=6.25%20billion%20annually.-,Types%20of%20insurance%20fraud,to%20claim%20payment%20for%20damages.
- https://www.attorneygeneral.gov/protect-yourself/insurance-fraud/types-of-insurance-fraud/
- https://helpstopfraud.org/insurance-fraud/types-of-insurance-fraud/
- https://www.tdi.texas.gov/fraud/types-of-fraud.html
- https://ifb.org.nz/what-is-fraud/types-of-fraud/

In [45]:
q3 = "how many types of insurance frauds are"

In [21]:
%%time
q3_urls = [
    "https://en.wikipedia.org/wiki/Insurance_fraud#:~:text=6.25%20billion%20annually.-,Types%20of%20insurance%20fraud,to%20claim%20payment%20for%20damages.",
    "https://www.attorneygeneral.gov/protect-yourself/insurance-fraud/types-of-insurance-fraud/",
    "https://helpstopfraud.org/insurance-fraud/types-of-insurance-fraud/",
    "https://www.tdi.texas.gov/fraud/types-of-fraud.html",
    "https://ifb.org.nz/what-is-fraud/types-of-fraud/",
]

q3_texts = [scrape_jina_ai(url) for url in q3_urls]

CPU times: user 24.3 ms, sys: 3.64 ms, total: 27.9 ms
Wall time: 1min 8s


In [22]:
q3_texts = [clean_up_markdown_link(t) for t in q3_texts]

# q4: how to detect insurance fraud  
- https://www.cigniti.com/blog/fraud-detection-insurance-claim-process-artificial-intelligence/
- https://www.unit21.ai/blog/combating-insurance-fraud
- https://www.caseiq.com/resources/the-ultimate-guide-to-insurance-fraud-investigations/
- https://www.wipro.com/analytics/comparative-analysis-of-machine-learning-techniques-for-detectin/
- https://risk.lexisnexis.co.uk/insights-resources/article/how-to-detect-insurance-fraud

In [46]:
q4 = "how to detect insurance fraud"

In [23]:
%%time
q4_urls = [
    "https://www.cigniti.com/blog/fraud-detection-insurance-claim-process-artificial-intelligence/",
    "https://www.unit21.ai/blog/combating-insurance-fraud",
    "https://www.caseiq.com/resources/the-ultimate-guide-to-insurance-fraud-investigations/",
    "https://www.wipro.com/analytics/comparative-analysis-of-machine-learning-techniques-for-detectin/",
    "https://risk.lexisnexis.co.uk/insights-resources/article/how-to-detect-insurance-fraud",
]

q4_texts = [scrape_jina_ai(url) for url in q4_urls]

CPU times: user 25.8 ms, sys: 3.9 ms, total: 29.7 ms
Wall time: 1min 29s


In [24]:
q4_texts = [clean_up_markdown_link(t) for t in q4_texts]

# Setup

In [61]:
from pydantic import BaseModel, Field
from typing import List

class RawContext(BaseModel):
    question:str = Field()
    context:str = Field()
    input_text:str = Field()
    system_prompt:str = Field()
    output_text:str = Field()

class RawContexts(BaseModel):
    contexts:List[RawContext]

In [63]:
%%time
raw_contexts = []

questions = [
    (q1, q1_texts), 
    (q2, q2_texts), 
    (q3, q3_texts), 
    (q4, q4_texts), 
]

for q, texts in questions:
    for t in texts:
        sa = SummaryAgent()
        input_dict = dict(context=t, message=q)
        sa.run(request=input_dict)
        raw_contexts.append(
            RawContext(
                question=q,
                context=t,
                input_text=sa.input_text,
                system_prompt=sa.system_prompt,
                output_text=sa.output_text
            )
        )

CPU times: user 749 ms, sys: 5.19 ms, total: 754 ms
Wall time: 1min 21s


In [64]:
len(raw_contexts)

20

In [65]:
raw_contexts[0].output_text

"Insurance fraud occurs when someone knowingly lies to obtain a benefit or advantage to which they are not otherwise entitled, or when someone knowingly denies a benefit that is due and to which someone is entitled. This can be done with the intent to defraud, and it doesn't necessarily require actual monetary loss.\n\nSome examples of insurance fraud include:\n\n- Making false claims for automobile property and personal injury\n- Falsifying information on workers' compensation claims\n- Misrepresenting information on health insurance claims\n- Filing false claims for residential and commercial property damage\n\nInsurance fraud can take many forms, but it often involves making false statements or misrepresenting information to an insurer in order to receive a benefit or advantage that is not rightfully owed."

## raw_contexts[-1].output_text

In [67]:
outline = """
Overview of Insurance Fraud:  
- what is insurance fraud look like  
- what is the impact on insurance fraud in USA  
- how many types of insurance frauds are  
- how to detect insurance fraud  
"""

In [69]:
message = "Write a blog based on the outline and make the language simple and easy to understand"

In [71]:
raw_contexts[0].question

'what is insurance fraud look like'

In [72]:
raw_contexts[0].output_text

"Insurance fraud occurs when someone knowingly lies to obtain a benefit or advantage to which they are not otherwise entitled, or when someone knowingly denies a benefit that is due and to which someone is entitled. This can be done with the intent to defraud, and it doesn't necessarily require actual monetary loss.\n\nSome examples of insurance fraud include:\n\n- Making false claims for automobile property and personal injury\n- Falsifying information on workers' compensation claims\n- Misrepresenting information on health insurance claims\n- Filing false claims for residential and commercial property damage\n\nInsurance fraud can take many forms, but it often involves making false statements or misrepresenting information to an insurer in order to receive a benefit or advantage that is not rightfully owed."

In [73]:
q_list = []
draft_dict = {}
for rc in raw_contexts:
    if rc.question not in q_list:
        q_list.append(rc.question)
        draft_dict[rc.question] = [rc.output_text]
    else:
        draft_dict[rc.question].append(rc.output_text)
    

In [75]:
draft_dict.keys()

dict_keys(['what is insurance fraud look like', 'what is the impact on insurance fraud in USA', 'how many types of insurance frauds are', 'how to detect insurance fraud'])

In [77]:
len(draft_dict['what is insurance fraud look like'])

5

In [81]:
draft_list = []
for k,v in draft_dict.items():
    draft_list.append(
        "## {k}\n{v}".format(k=k, v='\n'.join(v))
    )

In [83]:
draft = "\n".join(draft_list)

In [87]:
%%time

bw.run(request=dict(
    draft=draft,
    outline=outline,
    message=message
))

CPU times: user 38.7 ms, sys: 290 μs, total: 39 ms
Wall time: 6.23 s


In [88]:
print(bw.output_text)

**Understanding Insurance Fraud: Types, Impact, and Detection**

As a responsible and informed individual, it's essential to understand the concept of insurance fraud, its various types, and its impact on the insurance industry and the economy. In this blog, we'll delve into the world of insurance fraud, exploring what it looks like, its effects on the USA, the different types of insurance fraud, and how to detect it.

**What is Insurance Fraud?**

Insurance fraud occurs when someone intentionally deceives or misleads an insurance company to receive benefits or payments they're not entitled to. This can be done by providing false information, exaggerating claims, or staging accidents. Insurance fraud can take many forms, including auto, health, life, and property insurance fraud. It's a serious crime that affects not only insurance companies but also policyholders, as it leads to increased premiums and reduced coverage.

**The Impact of Insurance Fraud in the USA**

The impact of insur

In [92]:
model_11b_tokens = 0
for rc in raw_contexts:
    model_11b_tokens = 0
    model_11b_tokens += count_tokens(rc.system_prompt)
    model_11b_tokens += count_tokens(rc.input_text)
    model_11b_tokens += count_tokens(rc.output_text)
model_11b_tokens

10670

In [115]:
count_tokens(rc.system_prompt)

59

In [116]:
count_tokens(rc.input_text)

10266

In [117]:
count_tokens(rc.output_text)

345

In [96]:
model_11b_price = 0.00016
model_70b_price = 0.00072

In [114]:
rc.system_prompt

In [91]:
model_70b_tokens = 0
model_70b_tokens += count_tokens(bw.system_prompt)
model_70b_tokens += count_tokens(bw.input_text)
model_70b_tokens += count_tokens(bw.output_text)
model_70b_tokens

6844

In [101]:
token_batch = 1000
THB = 33

In [104]:
m11b_price_thb = ((model_11b_tokens/token_batch) * model_11b_price) * THB
m11b_price_thb

0.0563376

In [105]:
m70b_price_thb = ((model_70b_tokens/token_batch) * model_70b_price) * THB
m70b_price_thb

0.16261344