In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

  from tqdm.autonotebook import tqdm


In [2]:
from getpass import getpass

In [3]:
OPENAI_API_KEY = getpass("OpenAI API Key: ") 
PINECONE_API_KEY = getpass("Pinecone API Key: ")


In [4]:
PINECONE_API_ENV = 'us-west4-gcp-free'

In [5]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [6]:
import sys, os
sys.path.append(os.getcwd())

from loadutils import load_training


In [7]:
df = load_training()

In [8]:
sample = df.sample(frac=1)

In [9]:
len(sample)

12503

In [10]:
sample['input'] = sample['name'] + ', ' + sample['tagline'] + ', ' + sample['topics'].apply(lambda x: ','.join(x)) + ', ' + sample['host'] + ' , ' + sample['description']

In [11]:
index_name = 'lamaidx'
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV) 
index = pinecone.Index(index_name)

In [12]:
model = 'gpt-3.5-turbo'

In [13]:
from langchain.text_splitter import TokenTextSplitter

In [14]:
splitter = TokenTextSplitter(model_name=model, chunk_size=2000, chunk_overlap=0)

In [15]:
from langchain.embeddings.openai import OpenAIEmbeddings

embed = OpenAIEmbeddings(
    model=model,
    openai_api_key=OPENAI_API_KEY
)

In [16]:
from langchain.docstore.document import Document

docs = [Document(page_content=row['input'], id=row.id, metadata={'id': row.id, 'name': row['name'], 'tagline': row.tagline, 'topics': row.topics, 'host': row.host}) for _, row in sample.iterrows()]

In [17]:
docs[0]

Document(page_content="How to launch like Harry's, Easily create a referral program with this handy tool 💪, Web App,Email Marketing,Marketing,Tech, harrysprelaunchreferral.com , How to launch like Harry's is a side project for people to build a Harry’s prelaunch referral program in 4 simple steps.", metadata={'id': 104998, 'name': "How to launch like Harry's", 'tagline': 'Easily create a referral program with this handy tool 💪', 'topics': ['Web App', 'Email Marketing', 'Marketing', 'Tech'], 'host': 'harrysprelaunchreferral.com'})

In [36]:
index.delete(ids=[], delete_all=True)

{}

In [18]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.1,
 'namespaces': {'': {'vector_count': 12503}},
 'total_vector_count': 12503}

In [38]:
from tqdm.auto import tqdm
from uuid import uuid4

batch_limit = 100

texts = []
metadatas = []

for i, record in enumerate(tqdm(docs)):
    # first get metadata fields for this record
    metadata = record.metadata
    # now we create chunks from the record text
    record_texts = splitter.split_text(record.page_content)
    # create individual metadata dicts for each chunk
    record_metadatas = [{
        "chunk": j, "text": text, **metadata
    } for j, text in enumerate(record_texts)]
    # append these to current batches
    texts.extend(record_texts)
    metadatas.extend(record_metadatas)
    # if we have reached the batch_limit we can add texts
    if len(texts) >= batch_limit:
        ids = [str(uuid4()) for _ in range(len(texts))]
        embeds = embed.embed_documents(texts)
        index.upsert(vectors=zip(ids, embeds, metadatas))
        texts = []
        metadatas = []

if len(texts) > 0:
    ids = [str(uuid4()) for _ in range(len(texts))]
    embeds = embed.embed_documents(texts)
    index.upsert(vectors=zip(ids, embeds, metadatas))

  0%|          | 0/12503 [00:00<?, ?it/s]

In [39]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.1,
 'namespaces': {'': {'vector_count': 12503}},
 'total_vector_count': 12503}

In [19]:
from langchain.vectorstores import Pinecone

vectorstore = Pinecone(
    index, embed.embed_query, "text"
)

In [20]:
query = "on demand kid photoshoot"

r = vectorstore.similarity_search(
    query,  # our search query
    k=5  # return 3 most relevant docs
)

In [21]:
r

[Document(page_content='Totspot, Shop and sell kids fashion from your smartphone, Tech, itunes.apple.com , ', metadata={'chunk': 0.0, 'host': 'itunes.apple.com', 'id': 3098.0, 'name': 'Totspot', 'tagline': 'Shop and sell kids fashion from your smartphone', 'topics': ['Tech']}),
 Document(page_content='Doctor on Demand, On demand video chat with licensed doctors, Tech, doctorondemand.com , ', metadata={'chunk': 0.0, 'host': 'doctorondemand.com', 'id': 186.0, 'name': 'Doctor on Demand', 'tagline': 'On demand video chat with licensed doctors', 'topics': ['Tech']}),
 Document(page_content='Take it, Print your best photos for free (really). Shipped worldwide., iOS,Photography,Tech, takeitapp.co , ', metadata={'chunk': 0.0, 'host': 'takeitapp.co', 'id': 27679.0, 'name': 'Take it', 'tagline': 'Print your best photos for free (really). Shipped worldwide.', 'topics': ['iOS', 'Photography', 'Tech']}),
 Document(page_content='Fiverr Faces, Create jaw-dropping portraits from your selfies, Photogra

In [22]:
exs = []
for row in r:
    topics = '\n'.join(row.metadata['topics'])
    exs.append(f"product name: {row.metadata['name']}\ntagline: {row.metadata['tagline']}\ncategories: {topics}\ndomain: {row.metadata['host']}")

In [23]:
len(exs)

5

In [24]:
import openai

openai.api_key = OPENAI_API_KEY
examples = "\n".join(exs)
response = openai.ChatCompletion.create(
  model=model,
  messages=[{"role": "system", "content": "You are a helpful assistant that suggests product names, taglines, categories and domain names to help list on product hunt"}, 
            {"role": "user", "content": f"here are few of the most popular listings on product hunt for inspiration:\n{exs}"},
            {"role": "user", "content": f"generate 5 listing options for idea '{query}'"}],
  temperature=0.6
)

In [25]:
print(response['choices'][0]['message']['content'])

1. Product Name: KidSnap
Tagline: Capture the perfect moment with on-demand kids photoshoots
Categories: Photography, Parenting
Domain: kidsnap.co

2. Product Name: SnapSquad
Tagline: Professional kids photography at your fingertips
Categories: Photography, Parenting
Domain: snapsquad.com

3. Product Name: Kiddo Clicks
Tagline: Quality kids photoshoots on demand
Categories: Photography, Parenting
Domain: kiddoclicks.com

4. Product Name: ChildProof
Tagline: On-demand photoshoots for your little ones
Categories: Photography, Parenting
Domain: childproofapp.com

5. Product Name: TinyTogs
Tagline: On demand, professional photoshoots for your kids
Categories: Photography, Parenting
Domain: tinytogs.co


In [26]:
solution_types = ['Web App','iOS','SaaS','Android','Mac','Chrome Extensions','API','iPad','Windows','Wearables','Hardware','Apple','Browser Extensions']
domains= ['Marketing','User Experience','Messaging','Analytics','Education','Social Media','Growth Hacking','Fintech','Photography','Writing','Email','Task Management','Web3','Health & Fitness','Sales','E-Commerce','Social Network','Streaming Services','Hiring','Robots','Customer Communication','No-Code','Software Engineering','Travel','Newsletters','Email Marketing','Prototyping','Meetings','News','Music','Home','Investing','Search','Books','Calendar','Privacy','Payments','Global Nomad','Branding','SEO','Games','Notes','GitHub','Remote Work','Startup Books','Venture Capital','Sketch','Freelance','Art','Funny','Internet of Things','Maker Tools','Advertising','Notion','Icons','Spreadsheets','Augmented Reality','Crypto','Startup Lessons']

In [27]:
import random

solution = random.sample(solution_types, 1)
domain = random.sample(domains, 2)

In [28]:
solution, domain

(['Browser Extensions'], ['Books', 'Hiring'])

In [29]:
random_1k = index.query(
    vector=[0]*1536,
    top_k=1000,
    include_metadata=True
)

In [30]:
shots = random.sample(random_1k['matches'], 5)

In [31]:
examples = []
for shot in shots:
    topics = ','.join(shot['metadata']['topics'])
    examples.append(f"categories: {topics}\nidea: {shot['metadata']['name']}, {shot['metadata']['tagline']}")

In [32]:
examples

['categories: Mac,Web App,Design Tools,Photography,Developer Tools,Tech\nidea: Squash 2 for Mac, The easiest way to compress and optimize images for the web\n\n',
 'categories: Design Tools,Productivity,Art,Marketing,Developer Tools\nidea: Fresh Folk, An Illustration library of people and objects\n\n',
 'categories: Web App,Tech\nidea: Podcat, IMDB for podcasts\n\n',
 'categories: iOS,Education,Tech,Books\nidea: Spdr, Speed Reading\n\n',
 'categories: Productivity,Meetings,Developer Tools\nidea: Cosmos Video 3.0, A meetings tool to create visibility & a sense of belonging\n\n']

In [33]:
solution.extend(domain)

In [34]:
solution

['Browser Extensions', 'Books', 'Hiring']

In [35]:
import openai

exs = '\n\n'.join(examples)
query = ','.join(solution)
model = 'gpt-3.5-turbo'
response = openai.ChatCompletion.create(
  model=model,
  messages=[{"role": "system", "content": "You are a helpful assistant that suggests new product ideas given categories"}, 
            {"role": "user", "content": f"here are few of the most popular categories and taglines on product hunt:\n{exs}"},
            {"role": "user", "content": f"generate 1 new product idea in 20 words for:\n categories: '{query}'"}],
  temperature=0.6
)

In [36]:
response

<OpenAIObject chat.completion id=chatcmpl-7VbMnM94nPNLpJbkipEpyp3GKSbI8 at 0x10fc2f2c0> JSON: {
  "id": "chatcmpl-7VbMnM94nPNLpJbkipEpyp3GKSbI8",
  "object": "chat.completion",
  "created": 1687766161,
  "model": "gpt-3.5-turbo-0301",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Idea: HireRead, a browser extension that suggests relevant books to help hiring managers improve their recruitment and interviewing skills."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 202,
    "completion_tokens": 24,
    "total_tokens": 226
  }
}