In [126]:
# Imports
from bs4 import BeautifulSoup as bs
import requests
import re
import time
import random
import langchain
import langchain_community
import langchain_core
import langchain_openai
import streamlit as st
import math
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [4]:
url = 'https://community.atlassian.com/?sort=recent'
post_title_class = 'atl-post-list__tile__title'
post_body_class = 'lia-message-body-content'

In [5]:
def fetch_html(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print('Error fetching HTML!')
        return None

In [6]:
def pull_forum_posts(pages: int = 1):

    global post_title_class

    if not isinstance(pages, int):
        raise TypeError('The "pages" parameter must be an integer.')

    posts = []

    for i in range(1, pages + 1):

        url = f'https://community.atlassian.com/?sort=recent&page={i}'
        soup = bs(fetch_html(url), 'lxml')

        for post in soup.find_all(class_=post_title_class):

            post_data = {}

            post_data['title'] = post.find('a').get_text().strip()
            post_data['url'] = 'https://community.atlassian.com' + post.find('a')['href']

            posts.append(post_data)

        time.sleep(1)

    return posts


In [28]:
def pull_post_body(posts: list):

    global post_body_class

    if not isinstance(posts, list):
        raise TypeError('The "posts" parameter must be a list.')

    full_post_data = []

    for post in posts:

        post_data = {}

        soup = bs(fetch_html(post['url']), 'lxml')

        post_data['title'] = post['title']
        post_data['url'] = post['url']
        post_data['body'] = soup.find('div', class_=post_body_class).get_text().strip()

        full_post_data.append(post_data)

        time.sleep(random.randint(1, 3))

    return full_post_data

In [8]:
def full_pull(pages: int = 1):
    start_time = time.time()
    posts = pull_forum_posts(pages)
    post_pull_time = time.time() - start_time
    print(f'Pulled post titles and URLs. [{post_pull_time:.2f} seconds] \nProceeding to pull post bodies...')
    full_posts = pull_post_body(posts)
    body_pull_time = time.time() - start_time - post_pull_time
    print(f'Pulled post bodies. [{body_pull_time:.2f} seconds]\n[{time.time() - start_time:.2f} seconds]')

    return full_posts

In [32]:
one_page = full_pull(1)

Pulled post titles and URLs. [2.12 seconds] 
Proceeding to pull post bodies...
Pulled post bodies. [40.91 seconds]
[43.03 seconds]


In [33]:
one_page

[{'title': 'English trainer',
  'url': 'https://community.atlassian.com/t5/Teamwork-Lab-discussions/English-trainer/td-p/2760558',
  'body': "I'm happy to join this community"},
 {'title': 'use "-s recursive -X ours" for pull request',
  'url': 'https://community.atlassian.com/t5/Bitbucket-questions/use-quot-s-recursive-X-ours-quot-for-pull-request/qaq-p/2760552',
  'body': 'My Pull Request to a protected branch shows "You will need to resolve conflicts to be able to merge", but I locally I checked and it can be merged cleanly if I use `git merge\xa0-s recursive -X ours`.Can I use this strategy to resolve the conflict?'},
 {'title': 'Resource management: How to allocate issues on multiple users or teams in Jira?',
  'url': 'https://community.atlassian.com/t5/Jira-questions/Resource-management-How-to-allocate-issues-on-multiple-users-or/qaq-p/2760551',
  'body': "Hi,a very common use case is that companies want to do resource management on issues in Jira, and plan Initiatives or Epics m

In [3]:

# SERVICENOW

# url = 'https://www.servicenow.com/community/itsm/ct-p/it-service-management'
# post_title_class = 'custom-message-tile'
# post_title_avoid_class ='custom-thread-featured-flag'
# post_body_class = 'lia-message-body'
# load_button_xpath = '//*[@id="custom-loader-button"]'
# accept_cookies_button = '//*[@id="truste-consent-button"]'

# ASANA

url = 'https://forum.asana.com/c/forum-en/integrations/9'
post_title_class = 'link-top-line'
post_title_avoid_class ='custom-thread-featured-flag'
post_body_class = 'cooked'
load_button_xpath = '//*[@id="custom-loader-button"]'
accept_cookies_button = '//*[@id="truste-consent-button"]'

def fetch_posts(n):

    posts = []
    driver = webdriver.Chrome()
    action = ActionChains(driver)
    driver.get(url)
    time.sleep(2)
    # load_button = driver.find_element(By.XPATH, load_button_xpath)
    # accept_cookies_button = driver.find_element(By.XPATH, accept_cookies_button)
    # action.click(accept_cookies_button).perform()
    while len(posts) < n:
        try:
            # action.click(load_button).perform()
            post_titles = [post for post in driver.find_elements(By.CLASS_NAME, post_title_class) if post_title_avoid_class not in post.get_attribute('class')]
            for post in post_titles[len(posts):]:
                post_data = {}
                title = post.find_element(By.TAG_NAME, 'a').get_attribute('title')
                if title != '':
                    post_data['title'] = title
                else:
                    post_data['title'] = post.find_element(By.TAG_NAME, 'a').text
                post_data['url'] = post.find_element(By.TAG_NAME, 'a').get_attribute('href')
                posts.append(post_data)
        except Exception as e:
            print(f'Error: {e}')
            break
    for post in posts[:n]:
        try:
            driver.get(post['url'])
            time.sleep(1)
            post['body'] = driver.find_element(By.CLASS_NAME, post_body_class).text
        except Exception as e:
            print(f'Error: {e}')
    driver.quit()
    return posts[:n]

In [4]:
posts = fetch_posts(20)

In [5]:
posts

[{'title': 'About the Integrations category',
  'url': 'https://forum.asana.com/t/about-the-integrations-category/15',
  'body': 'Ask all your questions about Asana integrations 83!'},
 {'title': 'Zapier Integration - Get IDs for custom field',
  'url': 'https://forum.asana.com/t/zapier-integration-get-ids-for-custom-field/900012',
  'body': 'Hi. I’m connecting Google Sheets and Asana, using Zapier. In both the Asana projec and the Google Sheet I have a mult-select field for “Assets”. I get the following error, how do I get the custom field values ID? I’m a project manager, not a developer.\nThanks!\nFailed to create a task in Asana\nThe app returned “multi_enum_values: [0]: Not a recognized ID: Asset example”.\nThe error message “multi_enum_values: [0]: Not a recognized ID: Asset example” indicates that the value “Asset examples” is not a valid ID for the custom field you are trying to update in Asana. This typically happens when the custom field expects a specific set of predefined o

In [2]:
class ScraperObjects:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

    def __repr__(self):
        return f'{self.__class__.__name__}(' + '\n' + ',\n'.join(f'{key}={value}' for key,value in self.__dict__.items()) + '\n)'

In [23]:
test = ScraperObjects(url='somewebsite.com', post_title_class='post-title', none_type_key=)

In [24]:
print(test)

ScraperObjects(
url=somewebsite.com,
post_title_class=post-title
)


In [6]:
from openai import OpenAI
from dotenv import load_dotenv
import os
import json
import pandas as pd

load_dotenv()

def show_json(obj):
    display(json.loads(obj.model_dump_json()))

In [106]:
client = OpenAI()

instructions = """You are an expert in integrations software and can identify when a user would benefit from using Unito.
You will use the evaluate_unito_candidates tool to indicate whether the messages in tha thread are good candidates for Unito's integration platform.
A candidate would be a message where a user is discussing a need to sync data between two tools.
Or, a user is asking for a way to automate a process between two tools.
Or, a user is looking to increase productivity by connecting two tools.
"""

evaluate_unito_candidates_function = {
    "type": "function",
    "function": {
        "name": "evaluate_unito_candidates",
        "description": """Evaluates a thread of messages to determine which are good candidates for Unito integration based on specific criteria.
        Returns an array of boolean values, where each value corresponds to a message in the input array.
        If the message is a good candidate for Unito integration, the value should be true. Otherwise, it should be false.""",
        "parameters": {
            "type": "object",
            "properties": {
                "bool_list": {
                    "type": "array",
                    "description": "A list of boolean values corresponding to each element in the array provided by the user.",
                    "items": {
                        "type": "boolean",
                        "description": "Boolean values associated with indices from the array provided by the user."
                    }
                }
            },
            "required": ["bool_list"],
            'additionalProperties': False
        }
    },
    'strict': True
}

# # assistant = client.beta.assistants.create(
# #   name="Unito Expert",
# #   instructions=instructions,
# #   tools=[evaluate_unito_candidates_function],
# #   model="gpt-4",

# # )

In [19]:
show_json(assistant)

{'id': 'asst_DuhI25qpRTHwB507WNA9EVHR',
 'created_at': 1724617907,
 'description': None,
 'file_ids': [],
 'instructions': 'You are an expert in integrations software and can identify when a user would benefit from using Unito.\nYou will use the evaluate_unito_candidates tool to provide a boolean value for each message in a thread, indicating whether the post is a good candidate for Unito integration, and append it to a list.\nA candidate would be a message where a user is discussing a need to sync data between two tools. \nOr, a user is asking for a way to automate a process between two tools.\nOr, a user is looking to increase productivity by connecting two tools.\n',
 'metadata': {},
 'model': 'gpt-4',
 'name': 'Unito Expert',
 'object': 'assistant',
 'tools': [{'function': {'name': 'evaluate_unito_candidates',
    'description': 'Evaluates a thread of messages to determine which are good candidates for Unito integration based on specific criteria.\n        Returns an array of boole

In [12]:
def filter_posts_bulk(posts: list) -> list:

    global assistant, client, show_json

    df = pd.DataFrame(posts)
    thread = client.beta.threads.create()

    for i, row in df.iterrows():
        client.beta.threads.messages.create(
            thread_id=thread.id,
            role="user",
            content= row['body']
        )

    messages = client.beta.threads.messages.list(
        thread_id=thread.id
    )

    return messages

    # run = client.beta.threads.runs.create_and_poll(
    #     thread_id=thread.id,
    #     assistant_id=assistant.id,
    # )

    # if run.status == 'completed':
    #     messages = client.beta.threads.messages.list(
    #         thread_id=thread.id
    #     )
    #     print('Run completed.')
    # else:
    #     print(run.status)

In [20]:
def run(posts: list):
    messages = []
    for post in posts:
        messages.append(post['body'])

    return messages

In [68]:
bodies = [post['body'] for post in posts]
empty_list = [['something']]

for post in posts:
    empty_list[0].append(post['body'])

In [58]:
empty_list

[['something',
  'Ask all your questions about Asana integrations 83!',
  'Hi. I’m connecting Google Sheets and Asana, using Zapier. In both the Asana projec and the Google Sheet I have a mult-select field for “Assets”. I get the following error, how do I get the custom field values ID? I’m a project manager, not a developer.\nThanks!\nFailed to create a task in Asana\nThe app returned “multi_enum_values: [0]: Not a recognized ID: Asset example”.\nThe error message “multi_enum_values: [0]: Not a recognized ID: Asset example” indicates that the value “Asset examples” is not a valid ID for the custom field you are trying to update in Asana. This typically happens when the custom field expects a specific set of predefined options (IDs) and the provided value does not match any of these options.\nTo resolve this issue, you need to ensure that the value you are passing to the custom field is a valid ID recognized by Asana. You can do this by fetching the list of valid IDs for the custom fie

In [89]:
bools = []

def evaluate_unito_candidates(bool_list: list) -> list:
    bools.append(bool_list)

In [110]:
total_characters = 95000
total = len(''.join(post['body'] for post in posts))
split_posts = []

if total_characters > 32000:
    split_amount = math.ceil(total_characters / 32000)

insert_index = 0
full_loop_exit = False

for i in range(split_amount):
    if full_loop_exit:
        break
    else:
        split_posts.append([posts[insert_index]['body']])
        chunk_check = [posts[insert_index]['body']]
        new_loop = True
        while len(''.join([post for post in split_posts[i]])) < 32000 and not full_loop_exit and new_loop:
            for index, post in enumerate(posts[insert_index+1:]):
                split_posts[i].append(post['body'])
                chunk_check.append(post['body'])
                chunk_check.append(posts[index+1]['body'])
                if len(''.join(chunk_check)) >= 32000:
                    insert_index += index
                    new_loop = False
                    break
                elif len(''.join(split_posts[i])) == total:
                    full_loop_exit = True
                    break
                else:
                    del chunk_check[-1]

In [117]:
split_posts

[['Ask all your questions about Asana integrations 83!',
  'Hi. I’m connecting Google Sheets and Asana, using Zapier. In both the Asana projec and the Google Sheet I have a mult-select field for “Assets”. I get the following error, how do I get the custom field values ID? I’m a project manager, not a developer.\nThanks!\nFailed to create a task in Asana\nThe app returned “multi_enum_values: [0]: Not a recognized ID: Asset example”.\nThe error message “multi_enum_values: [0]: Not a recognized ID: Asset example” indicates that the value “Asset examples” is not a valid ID for the custom field you are trying to update in Asana. This typically happens when the custom field expects a specific set of predefined options (IDs) and the provided value does not match any of these options.\nTo resolve this issue, you need to ensure that the value you are passing to the custom field is a valid ID recognized by Asana. You can do this by fetching the list of valid IDs for the custom field from Asana a

In [115]:
for i in range(len(split_posts)):
    print(i)

0


In [118]:
for i in range(len(split_posts)):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[
            {'role': 'system', 'content': instructions},
            {'role': 'user', 'content': f'{split_posts[i]}'}
        ],
        tools=[evaluate_unito_candidates_function]
    )

In [127]:
bools =json.loads(response.choices[0].message.tool_calls[0].function.arguments)['bool_list']

In [135]:
len(bools)

22

In [13]:
messages = filter_posts_bulk(posts)

In [132]:
filtered_posts = [post for post, bool in zip(posts, bools) if bool]

In [136]:
filtered_posts

[{'title': 'Integration with Outlook Calendar has wrong Organizer',
  'url': 'https://forum.asana.com/t/integration-with-outlook-calendar-has-wrong-organizer/895016',
  'body': 'I’m an Microsoft 365 admin and have added admin consent in Entra to allow Asana for Outlook Calendar to talk to Microsoft 365. One staff person is testing this and when he adds a calendar item to a task it’s showing that the Organizer is my admin account.\nIt wouldn’t make sense for others to see the admin account as Organizer. Is there any way for him to set himself as Organizer rather than our Microsoft 365 Global Admin account. Can the Organizer default to the person creating the calendar item rather than the Global Admin account?'},
 {'title': 'Asana + Zapier + Google Drive',
  'url': 'https://forum.asana.com/t/asana-zapier-google-drive/887590',
  'body': 'Hello everyone, I’ve been trying to solve a problem with an automation for 2 days and I hope someone can help me.\nI want to create a zap so that when a 

In [138]:
print(response.usage.total_tokens)
print(response.usage.completion_tokens)

3085
38


In [8]:
count = 0
total = 0

while count < 20:

    rand1 = bool(random.randint(0,1))
    rand2 = bool(random.randint(0,1))

    try:
        if rand1:
            print('pee ')
            if rand2:
                count += 1
            else:
                print('lol ')
        else:
            print('poo ')
            if rand2:
                count += 1
            else:
                print('lol ')

        total += 1

    except Exception as e:
        print(f'Error: {e}')

print(total)

poo 
lol 
pee 
pee 
lol 
poo 
pee 
lol 
poo 
poo 
pee 
lol 
pee 
pee 
pee 
lol 
pee 
lol 
poo 
pee 
lol 
poo 
lol 
poo 
poo 
lol 
poo 
lol 
pee 
pee 
poo 
lol 
pee 
pee 
lol 
poo 
lol 
pee 
lol 
pee 
pee 
poo 
pee 
pee 
lol 
pee 
lol 
poo 
lol 
poo 
lol 
poo 
lol 
poo 
lol 
pee 
lol 
pee 
lol 
pee 
pee 
poo 
lol 
poo 
poo 
lol 
poo 
lol 
pee 
pee 
lol 
poo 
lol 
pee 
47
