In [1]:
%run functions.py
%run models.py

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from datasets import load_dataset

ds = load_dataset("goendalf666/sales-conversations-instruction-customer")

In [3]:
full_ds = keep_full_convos(ds)

In [4]:
train_set, test_set = split_train_test(full_ds)

In [5]:
train_set[:5]

['Customer: Can you please explain to me how this product works? Salesman: Certainly! This product is designed to simplify your workflow by automating repetitive tasks. It uses advanced algorithms to analyze data and generate actionable insights, saving you time and effort. Customer: Im not sure if this service is worth the price. Can you explain its benefits to me? Salesman: Absolutely! This service offers numerous benefits, such as increased productivity, improved efficiency, and cost savings. It integrates seamlessly with your existing systems and provides real-time analytics, allowing you to make data-driven decisions and stay ahead of the competition. Customer: I have a limited technical background. Can you explain this feature in simpler terms? Salesman: Of course! Let me break it down for you. This feature essentially streamlines your customer support process by automating ticket management and routing. It ensures that customer inquiries are efficiently handled and assigned to t

In [6]:
test = train_set[0]

In [7]:
dict_test = split_cust_sales(test)

In [8]:
full_text_cust, full_text_sales = dict_test['full']
split_text_cust, split_text_sales = dict_test['split']

In [9]:
print("Before:", full_text_cust)
print("-------")
print("After:", summarise(full_text_cust))

Before: Customer says to salesman: Certainly! This product is designed to simplify your workflow by automating repetitive tasks. It uses advanced algorithms to analyze data and generate actionable insights, saving you time and effort.
Absolutely! This service offers numerous benefits, such as increased productivity, improved efficiency, and cost savings. It integrates seamlessly with your existing systems and provides real-time analytics, allowing you to make data-driven decisions and stay ahead of the competition.
Of course! Let me break it down for you. This feature essentially streamlines your customer support process by automating ticket management and routing. It ensures that customer inquiries are efficiently handled and assigned to the most appropriate team member, resulting in faster response times and improved customer satisfaction.
Absolutely! This product offers time savings, increased efficiency, and improved decision-making through advanced analytics. It simplifies your pr

In [10]:
split_text_cust

['Certainly! This product is designed to simplify your workflow by automating repetitive tasks. It uses advanced algorithms to analyze data and generate actionable insights, saving you time and effort.',
 'Absolutely! This service offers numerous benefits, such as increased productivity, improved efficiency, and cost savings. It integrates seamlessly with your existing systems and provides real-time analytics, allowing you to make data-driven decisions and stay ahead of the competition.',
 'Of course! Let me break it down for you. This feature essentially streamlines your customer support process by automating ticket management and routing. It ensures that customer inquiries are efficiently handled and assigned to the most appropriate team member, resulting in faster response times and improved customer satisfaction.',
 'Absolutely! This product offers time savings, increased efficiency, and improved decision-making through advanced analytics. It simplifies your processes and helps you

In [11]:
for line in split_text_cust:
    print(f"{line} | Prediction: {analyze_sentiment(line)}")

Certainly! This product is designed to simplify your workflow by automating repetitive tasks. It uses advanced algorithms to analyze data and generate actionable insights, saving you time and effort. | Prediction: neutral
Absolutely! This service offers numerous benefits, such as increased productivity, improved efficiency, and cost savings. It integrates seamlessly with your existing systems and provides real-time analytics, allowing you to make data-driven decisions and stay ahead of the competition. | Prediction: neutral
Of course! Let me break it down for you. This feature essentially streamlines your customer support process by automating ticket management and routing. It ensures that customer inquiries are efficiently handled and assigned to the most appropriate team member, resulting in faster response times and improved customer satisfaction. | Prediction: neutral
Absolutely! This product offers time savings, increased efficiency, and improved decision-making through advanced a

In [12]:
extract_keywords(full_text_cust)

'Automated repetitive tasks, advanced algorithms, data analysis, productivity, efficiency, cost savings, real-time analytics, data-driven decisions, customer support process, ticket management, customer inquiries, faster response times, customer satisfaction,'

In [13]:
train_set[:5]

['Customer: Can you please explain to me how this product works? Salesman: Certainly! This product is designed to simplify your workflow by automating repetitive tasks. It uses advanced algorithms to analyze data and generate actionable insights, saving you time and effort. Customer: Im not sure if this service is worth the price. Can you explain its benefits to me? Salesman: Absolutely! This service offers numerous benefits, such as increased productivity, improved efficiency, and cost savings. It integrates seamlessly with your existing systems and provides real-time analytics, allowing you to make data-driven decisions and stay ahead of the competition. Customer: I have a limited technical background. Can you explain this feature in simpler terms? Salesman: Of course! Let me break it down for you. This feature essentially streamlines your customer support process by automating ticket management and routing. It ensures that customer inquiries are efficiently handled and assigned to t

In [14]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string

# Sample dataset of conversations
data = train_set

# Preprocessing function
def preprocess_text(text):
    # Tokenize text
    tokens = word_tokenize(text.lower())
    
    # Remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words and token not in string.punctuation]
    
    # Join tokens back into a string
    processed_text = ' '.join(tokens)
    
    return processed_text

# Preprocess each conversation
processed_data = [preprocess_text(conv) for conv in data]

# TF-IDF Vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(processed_data)

# Clustering with K-means
num_clusters = 3  # Number of clusters (you can adjust this)
kmeans = KMeans(n_clusters=num_clusters, random_state=0)
clusters = kmeans.fit_predict(X)

# Assign clusters to original data
clustered_data = pd.DataFrame({'Conversation': data, 'Cluster': clusters})

# Display the clustered data
print(clustered_data)


                                           Conversation  Cluster
0     Customer: Can you please explain to me how thi...        2
1     Customer: Im interested in your product, but I...        2
2     Customer: Hi, Im interested in learning more a...        2
3     Customer: Im interested in this new health pro...        2
4     Customer: Im looking for a financial product t...        0
...                                                 ...      ...
2723  Customer: Im interested in your financial prod...        0
2724  Customer: Im interested in your health supplem...        1
2725  Customer: Hi, Im interested in your health pro...        1
2726  Customer: Hi, Im looking for a new health supp...        1
2727  Customer: Im interested in your product, but I...        2

[2728 rows x 2 columns]


In [15]:
for i in range(10,20):
    print(clustered_data[clustered_data['Cluster'] == 1].iloc[i]['Conversation'])

Customer: Im interested in improving my overall health and wellness. Salesman: Thats great to hear! Tell me more about what specific areas youd like to focus on. Customer: I have been feeling very tired lately and Im looking for ways to boost my energy levels. Salesman: I understand how important it is to have enough energy throughout the day. Can you share any specific challenges youre facing in terms of feeling tired? Customer: Ive been experiencing back pain and Im wondering if there are any products or solutions that could help alleviate it. Salesman: Im sorry to hear about your back pain. Understanding the cause of the pain and finding the right solution is crucial. Can you provide more details about the type of pain and any activities that may worsen it? Customer: Im concerned about my weight and I want to find a sustainable way to lose some pounds. Salesman: Weight management is a common concern for many people. Its important to find a sustainable approach. Can you tell me more 

In [16]:
import random
import faker

fake = faker.Faker()

# Mocking client records
def mock_clients(num_clients):
    clients = []
    for i in range(num_clients):
        client_id = i + 1
        client_name = fake.company()
        clients.append((client_id, client_name))
    return clients
    # df_clients = pd.DataFrame(clients, columns=['id', 'name'])
    # return df_clients

# Mocking contact records
def mock_contacts(num_contacts, clients):
    contacts = []
    for i in range(num_contacts):
        contact_id = i + 1
        client_id = random.choice(clients)[0]
        contact_name = fake.name()
        phone_number = fake.phone_number()
        contacts.append((contact_id, client_id, contact_name, phone_number))
    return contacts
    # df_contacts = pd.DataFrame(contacts, columns=['id', 'client_id', 'name', 'phone_number'])
    # return df_contacts

# Mocking call records
def mock_call_records(num_calls, clients, contacts):
    call_records = []
    for i in range(num_calls):
        call_id = i + 1
        client_id = random.choice(clients)[0]
        contact_id = random.choice(contacts)[0]
        phone_number = fake.phone_number()
        call_records.append((call_id, client_id, contact_id, phone_number))
    return call_records
    # df_call_records = pd.DataFrame(call_records, columns=['id', 'client_id', 'contact_id', 'phone_number'])
    # return df_call_records

# Example usage
num_clients = 5
num_contacts = 10
num_calls = 20

clients = mock_clients(num_clients)
contacts = mock_contacts(num_contacts, clients)
call_records = mock_call_records(num_calls, clients, contacts)

In [17]:
df_clients = pd.DataFrame(clients, columns=['id', 'name'])
df_contacts = pd.DataFrame(contacts, columns=['id', 'client_id', 'name', 'phone_number'])
df_callrecords = pd.DataFrame(call_records, columns=['id', 'client_id', 'contact_id', 'phone_number'])

In [18]:
%run mock_conversations.py

In [19]:
convos['convo5']

[{'speaker': 'Alex',
  'role': 'salesman',
  'message': "Hi there! I'm Alex from TikTok's advertising team. How are you today?"},
 {'speaker': 'Emma',
  'role': 'customer',
  'message': "Hi Alex, I'm doing well, thank you. It's nice to meet you."},
 {'speaker': 'Alex',
  'role': 'salesman',
  'message': "Likewise, Emma! I'm excited to discuss how TikTok's advertising solutions can help your brand reach a wider audience and drive engagement. Could you share a bit about your business and your marketing goals?"},
 {'speaker': 'Emma',
  'role': 'customer',
  'message': "Sure! We're an e-commerce startup specializing in sustainable fashion. Our goal is to increase brand awareness and drive sales through digital marketing."},
 {'speaker': 'Alex',
  'role': 'salesman',
  'message': "That's fantastic. TikTok offers a variety of ad formats that can effectively showcase your sustainable fashion products to our engaged user base. Have you considered advertising on TikTok before?"},
 {'speaker': '

In [20]:
def extract(convo_ds):
    cust, sales = [], []
    for entry in convo_ds:
        if entry['role'] == 'customer':
            cust.append(entry['message'])
        if entry['role'] == 'salesman':
            sales.append(entry['message'])
    return cust, sales

list_cust, list_sales = extract(convos['convo5'])
full_cust, full_sales = ' '.join(list_cust), ' '.join(list_sales)

In [21]:
summarise(full_cust)

"Alex will send over a proposal for TikTok advertising to an e-commerce startup. The startup's goal is to increase brand awareness and drive sales through digital marketing. Alex will provide examples of successful ad campaigns on TikTok."

In [22]:
extract_keywords(full_cust)

'E-commerce, sustainable fashion, brand awareness, sales, digital marketing, advertising solution, TikTok advertising, budget'

In [23]:
for line in list_cust:
    print(f"{line} | Prediction: {analyze_sentiment(line)}")

Hi Alex, I'm doing well, thank you. It's nice to meet you. | Prediction: joy
Sure! We're an e-commerce startup specializing in sustainable fashion. Our goal is to increase brand awareness and drive sales through digital marketing. | Prediction: neutral
We've thought about it, but we're also exploring other platforms. We're looking for the most effective advertising solution that fits within our budget. | Prediction: neutral
Yes, that would be helpful. Can you also provide some examples of successful ad campaigns on TikTok within the fashion industry? | Prediction: neutral
Great, I look forward to seeing those. What are the next steps if we decide to move forward with TikTok advertising? | Prediction: joy
Sounds good, Alex. Please send over the proposal when it's ready. | Prediction: neutral
Thank you, Alex. I appreciate your assistance. Looking forward to reviewing the proposal! | Prediction: joy


In [24]:
for line in list_sales:
    print(f"{line} | Prediction: {analyze_sentiment(line)}")

Hi there! I'm Alex from TikTok's advertising team. How are you today? | Prediction: neutral
Likewise, Emma! I'm excited to discuss how TikTok's advertising solutions can help your brand reach a wider audience and drive engagement. Could you share a bit about your business and your marketing goals? | Prediction: joy
That's fantastic. TikTok offers a variety of ad formats that can effectively showcase your sustainable fashion products to our engaged user base. Have you considered advertising on TikTok before? | Prediction: neutral
Understood. TikTok's ad platform is flexible and can be tailored to fit different budgets. Would you be interested in receiving a personalized proposal outlining potential ad strategies and costs based on your marketing goals? | Prediction: neutral
Absolutely! Brands like H&M and Zara have successfully utilized TikTok's In-Feed Ads and Branded Hashtag Challenges to engage with their audience and drive sales. I'll gather specific case studies for you after our c

In [25]:
# Define follow-up action keywords
follow_up_keywords = ["schedule", "follow up", "set up", "arrange", "send", "discuss", "plan"]

# Function to extract follow-up actions from a conversation
def extract_follow_up_actions(convo):
    follow_up_actions = []
    for message in convo:
        for keyword in follow_up_keywords:
            if keyword in message["message"].lower():
                follow_up_actions.append({
                    "speaker": message["speaker"],
                    "role": message["role"],
                    "message": message["message"]
                })
                break  # Stop checking further keywords once a match is found
    return follow_up_actions

# Extract follow-up actions from each conversation in your convos dataset
all_follow_up_actions = {}
for convo_name, conversation in convos.items():
    follow_up_actions = extract_follow_up_actions(conversation)
    all_follow_up_actions[convo_name] = follow_up_actions

# Print the extracted follow-up actions for each conversation
for convo_name, actions in all_follow_up_actions.items():
    print(f"Conversation '{convo_name}':")
    for action in actions:
        print(f"- {action['role']} ({action['speaker']}): {action['message']}")
    print()


Conversation 'convo1':
- salesman (Alex): I'm great, thank you! I'm excited to discuss how TikTok can help your brand reach a wider audience and drive engagement. Could you tell me a bit about your business and your current marketing strategies?
- salesman (Alex): We can set up a follow-up meeting with our creative and marketing teams to discuss a customized strategy for your brand. We'll also provide you with a detailed proposal outlining our recommendations and potential costs.
- customer (Jamie): Sounds good, Alex. Let's schedule that follow-up meeting.
- salesman (Alex): Fantastic! I'll send you an email with available times for next week. Thank you for your time, Jamie. I look forward to working together to make your brand shine on TikTok!

Conversation 'convo2':
- salesman (Sarah): Costs can vary based on the influencer's reach and engagement rates. We can discuss budget options and find influencers who fit within your marketing budget while delivering impactful results.
- salesm

In [27]:
# Example of integrating semantic compatibility scoring with existing approach

from transformers import BertForSequenceClassification, BertTokenizer
import torch

# Fine-tuned BERT model and tokenizer (replace with your fine-tuned model)
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)


def find_best_action_noun_pair(actions, nouns):
    max_score = -float('inf')
    best_action_noun_pair = None
    
    for action in actions:
        for noun in nouns:
            # Prepare inputs for BERT
            input_text = f"{action} {noun}"
            inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)

            # Forward pass through BERT
            with torch.no_grad():
                outputs = model(**inputs)

            # Extract logits and softmax to get probabilities
            probabilities = torch.softmax(outputs.logits, dim=-1)[0].numpy()

            # Assuming index 1 is the positive class score
            score = probabilities[1]

            # Update best pair if score is higher
            if score > max_score:
                max_score = score
                best_action_noun_pair = (action, noun)
    
    return best_action_noun_pair

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [41]:
import spacy
import en_core_web_sm

# Load the SpaCy English model
nlp = en_core_web_sm.load()

# Define follow-up keywords
follow_up_keywords = ["schedule", "follow up", "set up", "arrange", "send", "discuss", "plan"]

# Function to extract follow-up actions with nouns using SpaCy
def extract_follow_up_actions_with_nouns(convo):
    follow_up_actions = []
    for message in convo:
        doc = nlp(message["message"])
        found_action = False
        for keyword in follow_up_keywords:
            if keyword in message["message"].lower():
                found_action = True
                action_nouns = []
                for token in doc:
                    if token.dep_ == "dobj":  # Check if token is a direct object
                        action_nouns.append(token.text)
                follow_up_actions.append({
                    "speaker": message["speaker"],
                    "role": message["role"],
                    "message": message["message"],
                    "action": keyword,
                    "nouns": action_nouns,
                    "timeframe": None  # Placeholder for timeframe
                })
                break  # Stop checking further keywords once a match is found

        if found_action:
            # Extract temporal entities (e.g., dates, times, durations) from the processed text
            for ent in doc.ents:
                if ent.label_ in ["DATE", "TIME", "DURATION"]:
                    follow_up_actions[-1]["timeframe"] = ent.text
                    break  # Take the first temporal entity found

    return follow_up_actions
# Example: Extract follow-up actions with nouns from each conversation in your convos dataset
all_follow_up_actions_with_nouns = {}
for convo_name, conversation in convos.items():
    follow_up_actions = extract_follow_up_actions_with_nouns(conversation)
    all_follow_up_actions_with_nouns[convo_name] = follow_up_actions

# Print the extracted follow-up actions with nouns for each conversation
for convo_name, actions in all_follow_up_actions_with_nouns.items():
    print(f"Conversation '{convo_name}':")
    for action in actions:
        print(f"- {action['role']} ({action['speaker']}): {action['message']} | Action: {find_best_action_noun_pair([action['action']], action['nouns'])} | Timeframe: {action['timeframe']}")
    print()


Conversation 'convo1':
- salesman (Alex): I'm great, thank you! I'm excited to discuss how TikTok can help your brand reach a wider audience and drive engagement. Could you tell me a bit about your business and your current marketing strategies? | Action: ('discuss', 'bit') | Timeframe: None
- salesman (Alex): We can set up a follow-up meeting with our creative and marketing teams to discuss a customized strategy for your brand. We'll also provide you with a detailed proposal outlining our recommendations and potential costs. | Action: ('set up', 'strategy') | Timeframe: None
- customer (Jamie): Sounds good, Alex. Let's schedule that follow-up meeting. | Action: None | Timeframe: None
- salesman (Alex): Fantastic! I'll send you an email with available times for next week. Thank you for your time, Jamie. I look forward to working together to make your brand shine on TikTok! | Action: ('send', 'email') | Timeframe: next week

Conversation 'convo2':
- salesman (Sarah): Costs can vary base

In [None]:
for convo_name, actions in all_follow_up_actions_with_nouns.items():
    print(actions)

[{'speaker': 'Alex', 'role': 'salesman', 'message': "I'm great, thank you! I'm excited to discuss how TikTok can help your brand reach a wider audience and drive engagement. Could you tell me a bit about your business and your current marketing strategies?", 'action': 'discuss', 'nouns': ['you', 'engagement', 'bit'], 'timeframe': None}, {'speaker': 'Alex', 'role': 'salesman', 'message': "We can set up a follow-up meeting with our creative and marketing teams to discuss a customized strategy for your brand. We'll also provide you with a detailed proposal outlining our recommendations and potential costs.", 'action': 'set up', 'nouns': ['meeting', 'strategy', 'you', 'recommendations'], 'timeframe': None}, {'speaker': 'Jamie', 'role': 'customer', 'message': "Sounds good, Alex. Let's schedule that follow-up meeting.", 'action': 'schedule', 'nouns': [], 'timeframe': None}, {'speaker': 'Alex', 'role': 'salesman', 'message': "Fantastic! I'll send you an email with available times for next wee

In [42]:
import spacy

# Load the SpaCy English model
nlp = spacy.load("en_core_web_sm")

# Define follow-up keywords
follow_up_keywords = ["schedule", "follow up", "set up", "arrange", "send", "discuss", "plan"]

# Function to extract follow-up actions with complete entities
def extract_follow_up_actions_with_entities(convo):
    follow_up_actions = []
    for message in convo:
        doc = nlp(message["message"])
        for keyword in follow_up_keywords:
            if keyword in message["message"].lower():
                # Find the root verb and its dependencies
                for token in doc:
                    if token.lemma_ == keyword:
                        action_phrase = []
                        for child in token.children:
                            # Collect all words related to the action
                            if child.dep_ in ("dobj", "prep", "pobj", "advmod", "amod", "attr"):
                                action_phrase.append(child.text)
                                for subchild in child.children:
                                    action_phrase.append(subchild.text)
                        action_phrase = ' '.join(sorted(set(action_phrase), key=lambda x: doc.text.find(x)))
                        follow_up_actions.append({
                            "speaker": message["speaker"],
                            "role": message["role"],
                            "message": message["message"],
                            "action": keyword,
                            "entity": f"{keyword} {action_phrase}"
                        })
    return follow_up_actions

# Example: Extract follow-up actions with entities from each conversation in your convos dataset
all_follow_up_actions_with_entities = {}
for convo_name, conversation in convos.items():
    follow_up_actions = extract_follow_up_actions_with_entities(conversation)
    all_follow_up_actions_with_entities[convo_name] = follow_up_actions

# Print the extracted follow-up actions with entities for each conversation
for convo_name, actions in all_follow_up_actions_with_entities.items():
    print(f"Conversation '{convo_name}':")
    for action in actions:
        print(f"- {action['role']} ({action['speaker']}): {action['message']} | Entity: {action['entity']}")
    print()

# Function to find the best action-noun pair using BERT
def find_best_action_noun_pair(actions):
    max_score = -float('inf')
    best_action_noun_pair = None

    for action in actions:
        action_phrase = action['entity']
        # Prepare inputs for BERT
        inputs = tokenizer(action_phrase, return_tensors="pt", padding=True, truncation=True)

        # Forward pass through BERT
        with torch.no_grad():
            outputs = model(**inputs)

        # Extract logits and softmax to get probabilities
        probabilities = torch.softmax(outputs.logits, dim=-1)[0].numpy()

        # Assuming index 1 is the positive class score
        score = probabilities[1]

        # Update best pair if score is higher
        if score > max_score:
            max_score = score
            best_action_noun_pair = action_phrase

    return best_action


Conversation 'convo1':
- salesman (Alex): I'm great, thank you! I'm excited to discuss how TikTok can help your brand reach a wider audience and drive engagement. Could you tell me a bit about your business and your current marketing strategies? | Entity: discuss 
- salesman (Alex): We can set up a follow-up meeting with our creative and marketing teams to discuss a customized strategy for your brand. We'll also provide you with a detailed proposal outlining our recommendations and potential costs. | Entity: discuss a customized strategy for
- customer (Jamie): Sounds good, Alex. Let's schedule that follow-up meeting. | Entity: schedule 
- salesman (Alex): Fantastic! I'll send you an email with available times for next week. Thank you for your time, Jamie. I look forward to working together to make your brand shine on TikTok! | Entity: send an email with times

Conversation 'convo2':
- salesman (Sarah): Costs can vary based on the influencer's reach and engagement rates. We can discuss

In [72]:
import spacy
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer

# Load the SpaCy English model
nlp = spacy.load("en_core_web_sm")

# Load the BERT model and tokenizer
model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"
model = AutoModelForTokenClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

# Define follow-up keywords
follow_up_keywords = ["schedule", "follow up", "set up", "arrange", "send", "discuss", "plan", "meet"]

# Function to extract follow-up actions with complete entities, excluding named entities
def extract_follow_up_actions_with_entities(convo):
    follow_up_actions = []
    for message in convo:
        doc = nlp(message["message"])
        entities = ner_pipeline(message["message"])
        named_entities = {entity['word'] for entity in entities if entity['entity'].startswith("B-")}
        
        for token in doc:
            if token.lemma_ in follow_up_keywords:
                action_phrase = [token.text]

                # Include direct objects, prepositional objects, and their modifiers
                for child in token.children:
                    if (child.dep_ in ("dobj", "prep", "pobj", "advmod", "amod", "attr") and 
                        child.text not in named_entities):
                        action_phrase.append(child.text)
                        for grandchild in child.children:
                            if (grandchild.dep_ in ("prep", "pobj", "advmod", "amod") and 
                                grandchild.text not in named_entities):
                                action_phrase.append(grandchild.text)

                # Combine subwords into complete entities
                complete_entity = ""
                for entity in entities:
                    if entity['word'] not in named_entities:
                        if not entity['word'].startswith("##"):
                            if complete_entity:
                                action_phrase.append(complete_entity)
                            complete_entity = entity['word']
                        else:
                            complete_entity += entity['word'][2:]
                if complete_entity:
                    action_phrase.append(complete_entity)
                
                # Join the action phrase and exclude named entities
                filtered_action_phrase = " ".join(
                    word for word in action_phrase if word not in named_entities
                )
                
                follow_up_actions.append({
                    "speaker": message["speaker"],
                    "role": message["role"],
                    "message": message["message"],
                    "action_phrase": filtered_action_phrase
                })
                break  # Stop checking further tokens once a match is found
    return follow_up_actions


# Extract follow-up actions with entities
follow_up_actions_with_entities = extract_follow_up_actions_with_entities(convos['convo5'])

# Print the extracted follow-up actions with entities
for action in follow_up_actions_with_entities:
    print(f"- {action['role']} ({action['speaker']}): {action['message']} | Action Phrase: {action['action_phrase']}")



Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


- customer (Emma): Hi Alex, I'm doing well, thank you. It's nice to meet you. | Action Phrase: meet you Alex
- salesman (Alex): Likewise, Emma! I'm excited to discuss how TikTok's advertising solutions can help your brand reach a wider audience and drive engagement. Could you share a bit about your business and your marketing goals? | Action Phrase: discuss Emma TikTok
- salesman (Alex): I'll prepare a detailed proposal for your review, including recommended ad formats, targeting options, and budget considerations. Once you've reviewed the proposal, we can schedule a call to discuss any questions or adjustments before launching your campaign. | Action Phrase: schedule call
- customer (Emma): Sounds good, Alex. Please send over the proposal when it's ready. | Action Phrase: send proposal Alex
- salesman (Alex): Will do, Emma. I'll prioritize putting together the proposal and send it to you by the end of the week. Thank you for your time today, and I look forward to helping your sustaina

In [71]:
import spacy
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer

# Load the SpaCy English model
nlp = spacy.load("en_core_web_sm")

# Load the BERT model and tokenizer for Named Entity Recognition (NER)
model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"
model = AutoModelForTokenClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

# Define follow-up keywords
follow_up_keywords = ["schedule", "follow up", "set up", "arrange", "send", "discuss"]

# Function to extract follow-up actions with complete entities, excluding named entities
def extract_follow_up_actions_with_entities(convo):
    follow_up_actions = []
    for message in convo:
        doc = nlp(message["message"])
        entities = ner_pipeline(message["message"])
        named_entities = {entity['word'] for entity in entities if entity['entity'].startswith("B-")}
        
        for token in doc:
            if token.lemma_ in follow_up_keywords:
                action_phrase = [token.text]

                # Include direct objects, prepositional objects, and their modifiers
                for child in token.children:
                    if (child.dep_ in ("dobj", "prep", "pobj", "advmod", "amod", "attr") and 
                        child.text not in named_entities):
                        action_phrase.append(child.text)
                        for grandchild in child.children:
                            if (grandchild.dep_ in ("prep", "pobj", "advmod", "amod") and 
                                grandchild.text not in named_entities):
                                action_phrase.append(grandchild.text)

                # Join the action phrase and exclude named entities
                filtered_action_phrase = " ".join(
                    word for word in action_phrase if word not in named_entities
                )
                
                follow_up_actions.append({
                    "speaker": message["speaker"],
                    "role": message["role"],
                    "message": message["message"],
                    "action_phrase": filtered_action_phrase
                })
                break  # Stop checking further tokens once a match is found
    return follow_up_actions

# Extract follow-up actions with entities
follow_up_actions_with_entities = extract_follow_up_actions_with_entities(convos['convo5'])

# Print the extracted follow-up actions with entities
for action in follow_up_actions_with_entities:
    print(f"- {action['role']} ({action['speaker']}): {action['message']} | Action Phrase: {action['action_phrase']}")


Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


- salesman (Alex): Likewise, Emma! I'm excited to discuss how TikTok's advertising solutions can help your brand reach a wider audience and drive engagement. Could you share a bit about your business and your marketing goals? | Action Phrase: discuss
- salesman (Alex): I'll prepare a detailed proposal for your review, including recommended ad formats, targeting options, and budget considerations. Once you've reviewed the proposal, we can schedule a call to discuss any questions or adjustments before launching your campaign. | Action Phrase: schedule call
- customer (Emma): Sounds good, Alex. Please send over the proposal when it's ready. | Action Phrase: send proposal
- salesman (Alex): Will do, Emma. I'll prioritize putting together the proposal and send it to you by the end of the week. Thank you for your time today, and I look forward to helping your sustainable fashion brand succeed on TikTok! | Action Phrase: send it by end


In [66]:
convos['convo4']

[{'speaker': 'TikTok Sales Representative (Emily)',
  'role': 'salesman',
  'message': "Hi there! I'm Emily from TikTok's advertising team. How are you today?"},
 {'speaker': 'Potential Client (Jessica)',
  'role': 'customer',
  'message': "Hi Emily, I'm doing well, thank you. How about you?"},
 {'speaker': 'Emily',
  'role': 'salesman',
  'message': "I'm great, thanks! I'm excited to discuss how TikTok's advertising solutions can benefit your business. Could you tell me a bit about your business and your advertising goals?"},
 {'speaker': 'Jessica',
  'role': 'customer',
  'message': 'Sure! We run a small boutique specializing in handmade jewelry. Our goal is to increase online sales and build brand awareness among a younger audience.'},
 {'speaker': 'Emily',
  'role': 'salesman',
  'message': "That's fantastic. TikTok could be a great platform to showcase your unique products through engaging video content. Have you considered advertising on TikTok before?"},
 {'speaker': 'Jessica',


In [49]:
convos['convo1']

[{'speaker': 'Alex',
  'role': 'salesman',
  'message': "Hi there! My name is Alex, and I'm with TikTok's business development team. How are you doing today?"},
 {'speaker': 'Jamie',
  'role': 'customer',
  'message': "Hi Alex, I'm doing well, thank you. How about you?"},
 {'speaker': 'Alex',
  'role': 'salesman',
  'message': "I'm great, thank you! I'm excited to discuss how TikTok can help your brand reach a wider audience and drive engagement. Could you tell me a bit about your business and your current marketing strategies?"},
 {'speaker': 'Jamie',
  'role': 'customer',
  'message': "Sure! We are a fashion retail brand, and we primarily market through Instagram and Facebook. We're looking to expand our online presence and reach a younger demographic."},
 {'speaker': 'Alex',
  'role': 'salesman',
  'message': 'That sounds like a perfect fit for TikTok! Our platform has a large and engaged user base, especially among Gen Z and Millennials. Have you considered using TikTok for your br

In [88]:
import spacy
from spacy.matcher import Matcher

# Load the SpaCy English model
nlp = spacy.load("en_core_web_sm")

# Define follow-up keywords
follow_up_keywords = ["schedule", "follow", "set", "arrange", "send", "discuss"]

# Create a Matcher object
matcher = Matcher(nlp.vocab)

# Define patterns for the keywords
pattern1 = [{'LOWER': 'schedule'}]
pattern2 = [{'LOWER': 'follow'}]
pattern3 = [{'LOWER': 'set'}]
pattern4 = [{'LOWER': 'arrange'}]
pattern5 = [{'LOWER': 'send'}]
pattern6 = [{'LOWER': 'discuss'}]

# Add patterns to the matcher
matcher.add('FOLLOW_UP_KEYWORDS', [pattern1, pattern2, pattern3, pattern4, pattern5, pattern6], on_match=None)

# Function to extract follow-up actions with chunks and temporal phrases
def extract_follow_up_actions_with_chunks(convo):
    follow_up_actions = []
    for message in convo:
        doc = nlp(message["message"])
        matches = matcher(doc)
        named_entities = {ent.text for ent in doc.ents if ent.label_ in {"PERSON", "ORG"}}
        
        for match_id, start, end in matches:
            token = doc[start]
            action_phrase = {token.text}
            
            # Include direct objects, prepositional objects, and their modifiers
            for child in token.children:
                if child.dep_ in ("dobj", "prep", "pobj", "advmod", "amod", "attr", "npadvmod") and child.text not in named_entities:
                    action_phrase.add(child.text)
                    for grandchild in child.children:
                        if grandchild.dep_ in ("prep", "pobj", "advmod", "amod", "npadvmod") and grandchild.text not in named_entities:
                            action_phrase.add(grandchild.text)
            
            # Include temporal phrases
            for ent in doc.ents:
                if ent.label_ == "DATE":
                    action_phrase.add(ent.text)

            # Join the action phrase and exclude named entities
            filtered_action_phrase = " ".join(sorted(action_phrase, key=lambda x: doc.text.find(x)))
            
            follow_up_actions.append({
                "speaker": message["speaker"],
                "role": message["role"],
                "message": message["message"],
                "action_phrase": filtered_action_phrase
            })
            break  # Stop checking further tokens once a match is found
    return follow_up_actions

# Example conversation
convo = [
    {"speaker": "Customer", "role": "customer", "message": "Can we schedule a follow-up meeting next week?"},
    {"speaker": "Sales Rep", "role": "sales_rep", "message": "Sure, I will send you an email with the details."},
    {"speaker": "Customer", "role": "customer", "message": "Great, let's set up a call and send proposal."}
]

# Extract follow-up actions with chunks
follow_up_actions_with_chunks = extract_follow_up_actions_with_chunks(convo)

# Print the extracted follow-up actions with chunks
for action in follow_up_actions_with_chunks:
    print(f"- {action['role']} ({action['speaker']}): {action['message']} | Action Phrase: {action['action_phrase']}")



- customer (Customer): Can we schedule a follow-up meeting next week? | Action Phrase: schedule follow meeting next week next week
- sales_rep (Sales Rep): Sure, I will send you an email with the details. | Action Phrase: send email with
- customer (Customer): Great, let's set up a call and send proposal. | Action Phrase: set call
