In [1]:
from transformers import pipeline

In [2]:
def summarize_chat_history(conversation_history: list) -> str:
    summarizer = pipeline("summarization")
    conversation_text = " ".join(conversation_history)
    summary_result = summarizer(conversation_text, max_length=150, min_length=30, do_sample=False)
    return summary_result[0]['summary_text']

In [3]:
def sliding_window(conversation_history: list, window_size: int = 5) -> list:
    return conversation_history[-window_size:]

In [4]:
def dynamic_token_budgeting(conversation_history: list, max_tokens: int) -> list:
    token_count = 0
    selected_messages = []
    for message in reversed(conversation_history):
        message_tokens = len(message.split())
        if token_count + message_tokens <= max_tokens:
            token_count += message_tokens
            selected_messages.insert(0, message)
        else:
            break
    return selected_messages

In [5]:
def hierarchical_memory(conversation_history: list, short_term_window: int = 5) -> dict:
    if len(conversation_history) <= short_term_window:
        return {"short_term": conversation_history, "long_term": ""}
    
    short_term = conversation_history[-short_term_window:]
    long_term_history = conversation_history[:-short_term_window]
    long_term_summary = summarize_chat_history(long_term_history)
    
    return {"short_term": short_term, "long_term": long_term_summary}

In [6]:
def retrieval_by_keyword(conversation_history: list, keyword: str) -> list:
    return [msg for msg in conversation_history if keyword.lower() in msg.lower()]

In [7]:
if __name__ == '__main__':
    conversation = [
        "User: Hi, I have a problem with my order.",
        "Bot: I'm sorry to hear that, can you provide your order number?",
        "User: Sure, it's 123456.",
        "Bot: Thank you, let me look into this.",
        "User: I also have a question about your return policy.",
        "Bot: Our return policy lasts 30 days. Please let me know if you have any other questions or need further assistance.",
        "User: That helps a lot, thanks!",
        "Bot: You're welcome! Have a great day.",
        "User: I wonder if I can track my order online?",
        "Bot: Yes, you can track your order by logging into your account."
    ]
    summary = summarize_chat_history(conversation)
    print("Summarized Conversation:")
    print(summary)
    print("\n")
    sliding = sliding_window(conversation, window_size=5)
    print("Sliding Window (Last 5 Messages):")
    for msg in sliding:
        print(msg)
    print("\n")
    dynamic_history = dynamic_token_budgeting(conversation, max_tokens=30)
    print("Dynamic Token Budgeting (Max 30 Tokens):")
    for msg in dynamic_history:
        print(msg)
    print("\n")
    memory = hierarchical_memory(conversation, short_term_window=5)
    print("Hierarchical Memory:")
    print("Short Term:")
    for msg in memory['short_term']:
        print(msg)
    print("Long Term Summary:")
    print(memory['long_term'])
    print("\n")
    keyword_results = retrieval_by_keyword(conversation, keyword="order")
    print("Retrieval by Keyword ('order'):")
    for msg in keyword_results:
        print(msg)

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use mps:0
Your max_length is set to 150, but your input_length is only 133. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=66)
No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


Summarized Conversation:
 Bot: I'm sorry to hear that, can you provide your order number? User: Sure, it's 123456. Bot: Thank you, let me look into this . User: I also have a question about your return policy . Bot: Our return policy lasts 30 days .


Sliding Window (Last 5 Messages):
Bot: Our return policy lasts 30 days. Please let me know if you have any other questions or need further assistance.
User: That helps a lot, thanks!
Bot: You're welcome! Have a great day.
User: I wonder if I can track my order online?
Bot: Yes, you can track your order by logging into your account.


Dynamic Token Budgeting (Max 30 Tokens):
Bot: You're welcome! Have a great day.
User: I wonder if I can track my order online?
Bot: Yes, you can track your order by logging into your account.




Device set to use mps:0
Your max_length is set to 150, but your input_length is only 62. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=31)


Hierarchical Memory:
Short Term:
Bot: Our return policy lasts 30 days. Please let me know if you have any other questions or need further assistance.
User: That helps a lot, thanks!
Bot: You're welcome! Have a great day.
User: I wonder if I can track my order online?
Bot: Yes, you can track your order by logging into your account.
Long Term Summary:
 Bot: I'm sorry to hear that, can you provide your order number? User: Sure, it's 123456. Bot: Thank you, let me look into this . User: I also have a question about your return policy .


Retrieval by Keyword ('order'):
User: Hi, I have a problem with my order.
Bot: I'm sorry to hear that, can you provide your order number?
User: I wonder if I can track my order online?
Bot: Yes, you can track your order by logging into your account.
