# Multi-Category Classification Prompt

In [None]:
import ollama
ollama_model = "llama3:70b-instruct"
ollama.list()

In [None]:
#dataload
import pandas as pd
class myconfig():
    patent_data_path = r"/home/work/lib_data/nghl/kisti/과제2/카테고리_특허_테스트.xlsx"
mcfg = myconfig()

summary_data = pd.read_excel(mcfg.patent_data_path)
title = list(summary_data["등록명(영)"].str.lower())
summary = list(summary_data["abstract(영)"])

# Human-written Prompt

In [None]:
results_multi = []

for i in range(len(title)):
    prompt = f"""Recommend IPC subclass codes for this item, providing probabilities that sum to 100%. Please recommend between one and four.

    Here's a short description of {title[i]}:
    {summary[i]}

    What is '{title[i]}''s IPC subclass code?

    Format of answer should be 'G06F (50%)'
    Do not include maingroup and subgroup details. Just say the first 4 digits of the subclass.
    Answer without any further explanations.
    """
    
    response = ollama.chat(
        model=ollama_model,
        messages=[
            {"role": "system", "content": "You are an expert in the International Patent Classification(IPC). Categorize patents without further explanation and subgroup details like '7/00'"},
            {'role': 'user', 'content': prompt}
        ]
    )
    
    print(response['message']['content'])
    results_multi.append(response['message']['content'])

# LLM based Prompt

In [None]:
#instruction induction
ctgr = []
for i in range(5):
    prompt = """I gave a Patent Attorney an instruction, five inputs that are 'Title of Invention-Abstract of the each Invention' pairs.
    The Patent Attorney read the instruction and infered an output for every one of the input pairs.

    The outputs should be categorized into 1 to 3 classes, depending on their relevance to the invention. Ensure that the number of categories (outputs) for each invention does not exceed 3.

    Here are the Title-Abstract pairs and outputs:

    Title of Invention: *Distributed Denial Of Service Mitigation In A Container Based Framework
    Abstract of Invention : **Abstract of Distributed Denial Of Service Mitigation In A Container Based Framework
    Output: H04L(100%)

    Title of Invention: *System And Method For Autonomous Lawn Care
    Abstract of Invention : **Abstract of System And Method For Autonomous Lawn Care
    Output: G05B(40%), A01D(30%), G05D(30%)

    Title of Invention: *Interactive Autonomous Vehicle Command Controller
    Abstract of Invention : **Abstract of Interactive Autonomous Vehicle Command Controller
    Output: G05D(40%), B60Q(30%), B60L(20%), Y02T(10%) 

    Title of Invention: *Systems, Methods And Devices For Prosthetic Heart Valve With Single Valve Leaflet
    Abstract of Invention : **Abstract of Systems, Methods And Devices For Prosthetic Heart Valve With Single Valve Leaflet
    Output: A61F(100%)

    Title of Invention: *Method For Improving Soil Strength In Excavated Soil
    Abstract of Invention : **Abstract of Method For Improving Soil Strength In Excavated Soil
    Output: A01B(50%), C09K(50%)

    The instruction was"""

    response = ollama.chat(
        model=ollama_model,
        messages=[
        {"role": "system", "content": "You are a expert of prompt engineering. Generate proper instruction without further explanation."},
        {'role': 'user', 'content': prompt}],
        )

    print(response['message']['content'])
    ctgr.append(response['message']['content'])

In [None]:
instruction_cleaned = pd.read_excel('/home/work/lib_data/nghl/kisti/과제2/ctgr_instruction2.xlsx')
instruction_list = list(instruction_cleaned['instruction_set'])

In [None]:
#프롬프트 만들기

prompt_dic = {}
number = 0

for instruction in instruction_list:
    input_prompt = []
    
    for patent, abstract in zip(title, summary):
        prompt = f"""{instruction}

   Title of Invention: {patent}
   Abstract of Invention: {abstract}
   Output: """
        input_prompt.append(prompt)
    
    prompt_dic[number] = input_prompt
    number += 1

In [None]:
#추론 시작
from tqdm import tqdm

result = {}

for i in tqdm(range(len(instruction_list))):
    
    prompt_list = prompt_dic[i]
    result_list = []
    
    for each_prompt in prompt_list:
        response = ollama.chat(
            model=ollama_model,
            messages=[
                {"role": "system", "content": "You are a helpful Patent Attorney of this categorizing task. Categorize patents without further explanation and subgroup details like '7/00'"},
                {'role': 'user', 'content': each_prompt}],
            )
        print(response['message']['content'])
        result_list.append(response['message']['content'])
    
    result[i] = result_list

In [None]:
result_df = pd.DataFrame.from_dict(result, orient='index')

In [None]:
#LLM based prompt
results_multi = []

for i in range(len(title)):
    prompt = f"""Classify the provided Title-Abstract pairs into relevant International Patent Classification (IPC) codes. 
    Assign a percentage weight to each IPC code, indicating its relevance to the invention. 
    Ensure that the total number of IPC codes for each invention does not exceed 4.

    Title of Invention: {patent}
    Abstract of Invention: {abstract}
    Output: """
    
    response = ollama.chat(
        model=ollama_model,
        messages=[
            {"role": "system", "content": "You are an expert in the International Patent Classification(IPC). Categorize patents without further explanation and subgroup details like '7/00'"},
            {'role': 'user', 'content': prompt}
        ]
    )
    
    print(response['message']['content'])
    results_multi.append(response['message']['content'])