In [4]:
import requests
from bs4 import BeautifulSoup
from json import loads

In [5]:
pages = ["managing-your-account", "using-twitter", "safety-and-security", "rules-and-policies"]

In [12]:
# scrape twitter faqs from help.twitter.com
def get_faqs():
  """Scrape twitter faqs to get all posts"""
  links = []

  for page in pages:
    url = f"https://help.twitter.com/en/{page}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    faqs = soup.find_all('div', {'class': 'h03__subcategory'})
    # get json data from data-json-str attribute
    for faq in faqs:
      # parse json data and append .results to links
      if (faq['data-json-str']):
        links += loads(faq['data-json-str'])['results']

  return links

faqs = get_faqs()

In [44]:
def get_posts(links):
  """Scrape twitter faqs to get all posts"""
  posts = []

  for link in links:
    url = f"https://help.twitter.com{link['url']}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    sections = soup.find_all('div', {'class': 'ct01__content'})
    title = soup.find('h2', {'class': 'b01__headline'})
    if not title:
      title = soup.find('h1', {'class': 'b01__headline'})

    # ignore first and last sections
    for section in sections[1:-1]:
      posts.append(f"=== {title.text.strip()} ===\n\n{section.text.strip()}")

  return posts

In [45]:
posts = get_posts(faqs)

In [46]:
print(len(posts))

224


In [47]:
# count all posts length
total = 0
for post in posts:
  total += len(post)

print(total)

237235


In [48]:
# create a new file in data/ and write posts to it
with open('data/twitter.txt', 'w') as f:
  for post in posts:
    f.write(f"{post}\n\n\n")
  
  print("Done!")


Done!


In [49]:
%pip install gpt-index

Collecting gpt-index
  Downloading gpt_index-0.1.0.tar.gz (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.7/40.7 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting openai
  Downloading openai-0.25.0.tar.gz (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.9/44.9 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting dataclasses-json
  Downloading dataclasses_json-0.5.7-py3-none-any.whl (25 kB)
Collecting langchain
  Downloading langchain-0.0.34-py3-none-any.whl (119 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.4/119.4 kB[0m [31m33.7 MB/s[0m eta [36m0:00:0

In [50]:
# My OpenAI Key
import os
os.environ['OPENAI_API_KEY'] = "OPENAI_API_KEY"

In [51]:
from gpt_index import GPTTreeIndex, SimpleDirectoryReader, LLMPredictor
from IPython.display import Markdown, display
from langchain import OpenAI

In [53]:
documents = SimpleDirectoryReader('data').load_data()
index = GPTTreeIndex(documents)

Downloading:   0%|          | 0.00/0.99M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

> Building index from nodes: 18 chunks
0/184
> 0/184, summary: 
Twitter allows you to reset your password if you have forgotten it or if you want to change it. You can do this by logging in with your username and password, checking other platforms to see if you are still logged in, or by verifying what information you can. If you can't determine the valid email, you can start with your phone number. Twitter will send a code to the phone associated with your account so you can reset your password. If you receive a prompt saying more than one Twitter account is associated with a phone number, this option won't work for you.
10/184
> 10/184, summary: 
Twitter allows third-party apps to request access to perform different actions using your Twitter account. You can review and revoke access for apps by visiting the Apps and sessions section of your account settings.
20/184
> 20/184, summary: 
Twitter Toolbox is a hub where you can discover other self-serve, third-party tools created by deve

In [54]:
index.save_to_disk('twitter_index.json')

In [55]:
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003"))
new_index = GPTTreeIndex.load_from_disk('twitter_index.json', llm_predictor=llm_predictor)

In [58]:
response = new_index.query("How do I delete my tweet")

> Starting query: How do I delete my tweet
>[Level 0] Selected node: [1]/[1]
>[Level 0] Node [1] Summary text:  Twitter is a social media platform that allows users to share updates, thoughts, and opinions. The platform can be used to connect with other users, as well as to access news and information. Twitter also offers a number of features, such as the ability to reset your password, that can be helpful if you forget your login information or want to change your password. Additionally, Twitter Toolbox is a hub where you can discover other self-serve, third-party tools created by developers to enhance your Twitter experience.
>[Level 1] Selected node: [3]/[3]
>[Level 1] Node [3] Summary text:  Twitter Toolbox is a hub where you can discover other self-serve, third-party tools created by developers to enhance your Twitter experience. It currently offers a selection of tools in 3 categories: Expression, Safety, and Measurement. You can add a tool by following the steps on the Twitter T

In [59]:
display(Markdown(f"{response}"))

To delete your tweet, log in to your Twitter account and navigate to the tweet you want to delete. Click the three dots icon next to the tweet and select "Delete" from the drop-down menu. Confirm that you want to delete the tweet and it will be removed from your account.