In [1]:
import os
from crewai import Agent, Task, Process, Crew

In [2]:
from langchain_community.tools.ddg_search.tool import DuckDuckGoSearchRun


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_community.tools.ddg_search.tool import DuckDuckGoSearchRun


In [3]:
search_tool = DuckDuckGoSearchRun()

In [17]:
from pathlib import Path
notebook_dir = Path('.')
file_path = notebook_dir.absolute().parent / 'data' / 'secrets.txt'

secret_dict = {}
with open(file_path, 'r') as file:
    for line in file:
        line = line.strip()
        if '=' in line:
            key, value = line.split('=', 1)
            key = key.strip()
            value = value.strip().strip('"')
            secret_dict[key] = value


In [21]:
os.environ["SERPER_API_KEY"] = secret_dict["SERPER_KEY"]

In [None]:
from langchain.llms import Ollama
from langchain.utilities import GoogleSerperAPIWrapper
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda

# Initialize your local Ollama LLM model
ollama_llm = Ollama(model="llama3.1:latest")

# Initialize the Google Serper API Wrapper
google_serper = GoogleSerperAPIWrapper(api_key=secret_dict["SERPER_KEY"])

# Wrap the Google Serper API Wrapper in a RunnableLambda
google_serper_runnable = RunnableLambda(lambda x: google_serper.run(x))

# Define a prompt template
prompt_template = PromptTemplate(
    input_variables=["query"],
    template="{query}"
)

# Create a RunnableSequence using the pipe operator
sequence = prompt_template | ollama_llm | google_serper_runnable

# Example query
query = "latest AI research news"

# Run the sequence
result = sequence.invoke(query)
print(result)


In [None]:
from langchain.llms import Ollama
from langchain.utilities import GoogleSerperAPIWrapper
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda


# Initialize your local Ollama LLM model
# ollama_llm = Ollama(model_path="path/to/your/local/ollama/model")
ollama_llm = Ollama(model="llama3.1:latest")


# Initialize the Google Serper API Wrapper
google_serper = GoogleSerperAPIWrapper(api_key=secret_dict["SERPER_KEY"])

# Wrap the Google Serper API Wrapper in a RunnableLambda
google_serper_runnable = RunnableLambda(lambda x: google_serper.run(x))

# Define a prompt template
prompt_template = PromptTemplate(
    input_variables=["query"],
    template="Search for: {query}"
)

# Create a RunnableSequence using the pipe operator
sequence = prompt_template | ollama_llm | google_serper_runnable

# Example query
query = "latest news on AI research"

# Run the sequence
result = sequence.invoke(query)
print(result)

Approach 1 

In [None]:
from langchain.agents import Tool
from langchain.utilities import GoogleSerperAPIWrapper

# Initialize SerpAPI tool with your API key
# os.environ["OPENAI_API_KEY"] = "Your Key"
os.environ["SERPER_API_KEY"] = secret_dict["SERPER_KEY"]

search = GoogleSerperAPIWrapper()

# Create tool to be used by agent
search_tool = Tool(
  name="Intermediate Answer",
  func=search.run,
  description="useful for when you need to ask with search",
)

In [None]:
explorer = Agent(
    role='Senior Researcher',
    goal='Find and explore the most exciting projects and companies in AI and machine learning in 2024',
    backstory="""You are and Expert strategist that knows how to spot
    emerging trends and companies in AI, tech and machine learning. 
    You're great at finding interesting, exciting projects in Open 
    Source/Indie Hacker space. You look for exciting, rising
    new github ai projects that get a lot of attention.
    """,
    verbose=True,
    allow_delegation=False,
    tools=[search_tool]
)
writer = Agent(
    role='Senior Technical Writer',
    goal='Write engaging and interesting blog post about latest AI projects using simple, layman vocabulary',
    backstory="""You are an Expert Writer on technical innovation, especially 
    in the field of AI and machine learning. You know how to write in 
    engaging, interesting but simple, straightforward and concise. You know 
    how to present complicated technical terms to general audience in a 
    fun way by using layman words.""",
    verbose=True,
    allow_delegation=True
)
critic = Agent(
    role='Expert Writing Critic',
    goal='Provide feedback and criticize blog post drafts. Make sure that the tone and writing style is compelling, simple and concise',
    backstory="""You are an Expert at providing feedback to the technical
    writers. You can tell when a blog text isn't concise,
    simple or engaging enough. You know how to provide helpful feedback that 
    can improve any text. You know how to make sure that text 
    stays technical and insightful by using layman terms.
    """,
    verbose=True,
    allow_delegation=True
)

In [None]:
task_report = Task(
    description="""Use and summarize scraped data from subreddit LocalLLama to
    make a detailed report on the latest rising projects in AI. Your final 
    answer MUST be a full analysis report, text only, ignore any code or 
    anything that isn't text. The report has to have bullet points and with
    5-10 exciting new AI projects and tools. Write names of every tool and 
    project. Each bullet point MUST contain 3 sentences that refer to one 
    specific ai company, product, model or anything you found on subreddit 
    LocalLLama. Use ONLY scraped data from LocalLLama to generate the report. 
    """,
    agent=explorer,
)

task_blog = Task(
    description="""Write a blog article with text only and with a short but 
    impactful headline and at least 10 paragraphs. Blog should summarize 
    the report on latest ai tools found on localLLama subreddit. Style and 
    tone should be compelling and concise, fun, technical but also use 
    layman words for the general public. Name specific new, exciting projects,
    apps and companies in AI world. Don't write "**Paragraph [number of the 
    paragraph]:**", instead start the new paragraph in a new line. Write names
    of projects and tools in BOLD.
    ALWAYS include links to projects/tools/research papers.
    """,
    agent=writer,
)

task_critique = Task(
    description="""Identify parts of the blog that aren't written concise 
    enough and rewrite and change them. Make sure that the blog has engaging 
    headline with 30 characters max, and that there are at least 10 paragraphs.
    Blog needs to be rewritten in such a way that it contains specific 
    names of models/companies/projects but also explanation of WHY a reader
    should be interested to research more. Always include links to each paper/
    project/company
    """,
    agent=critic,
)

In [None]:
# instantiate crew of agents
crew = Crew(
    agents=[explorer, writer, critic],
    tasks=[task_report, task_blog, task_critique],
    verbose=2,
    # Sequential process will have tasks executed one after the other and the outcome of the previous one is passed as extra content into this next.
    process=Process.sequential 
)

In [None]:
# Get your crew to work!
result = crew.kickoff()

print("######################")
print(result)

# Reddit

In [35]:

import praw
import pandas as pd
from datetime import datetime


In [36]:
UG = "Scraper 3.7 u/arbi11"
reddit = praw.Reddit(
            client_id=secret_dict["REDDIT_CLIENT_ID"],
            client_secret=secret_dict["REDDIT_CLIENT_SECRET"],
            user_agent=UG,
        )

In [37]:

# Create empty lists to store data
comments = []
comment_authors = []
comment_timestamps = []

# Iterate through the hot submissions in the specified subreddit
for submission in reddit.subreddit("News").hot(limit=None):  # Adjust the limit as needed
 # Iterate through submission comments
    for comment in submission.comments.list():
        # Check if the comment is an instance of MoreComments
        if isinstance(comment, praw.models.MoreComments):
            continue  # Skip MoreComments objects

        # Append comment information
        comments.append(comment.body)
        comment_authors.append(comment.author.name if comment.author else "Unknown")

        # Convert timestamp to human-readable date and time
        timestamp = datetime.utcfromtimestamp(comment.created_utc)
        comment_timestamps.append(timestamp)

In [38]:
# Create a DataFrame
df = pd.DataFrame({
    'Comment': comments,
    'Comment Author': comment_authors,
    'Comment Timestamp': comment_timestamps
})

# Save the DataFrame to a CSV file
df.to_csv('reddit_comments_dataset.csv', index=False)

# Display the DataFrame
print(df.head())

                                             Comment      Comment Author  \
0  > “If you’re a registered voter in PA, GA, NV,...       Panic_Azimuth   
1  If a democrat billionaire like Soros started t...         CrimsonR4ge   
2   But don’t hand water to people standing in line.  OSI_Hunter_Gathers   
3  I love that Cards Against Humanity is back in ...  tronaldrumptochina   
4  Donald Trump is a human toilet.\n\nNo need to ...               rnilf   

    Comment Timestamp  
0 2024-10-11 20:44:30  
1 2024-10-11 20:55:59  
2 2024-10-11 22:12:04  
3 2024-10-11 22:07:31  
4 2024-10-11 20:39:45  


In [40]:

# Create empty lists to store data
titles = []
submission_ids = []
authors = []
scores = []
upvote_ratios = []
num_comments = []
urls = []

for submission in reddit.subreddit('News').hot(limit=None):

    # Append data to lists
    titles.append(submission.title)
    submission_ids.append(submission.id)
    authors.append(submission.author)
    scores.append(submission.score)
    upvote_ratios.append(submission.upvote_ratio)
    num_comments.append(submission.num_comments)
    urls.append(submission.url)

In [41]:
# Create a DataFrame using the lists
df = pd.DataFrame({
    'Title': titles,
    'Submission ID': submission_ids,
    'Author': authors,
    'Score': scores,
    'Upvote Ratio': upvote_ratios,
    'Number of Comments': num_comments,
    'URL': urls
})

# Print or manipulate the DataFrame as needed
print(df)
# Save the DataFrame to a CSV file
df.to_csv('reddit_data.csv', index=False)

                                                 Title Submission ID  \
0     Cards Against Humanity offers payouts to new ...       1g1ixeg   
1    Dismembered remains found in freezer identifie...       1g1r3eh   
2    Montana businessman gets 2 years in prison for...       1g1odva   
3    US meteorologists face death threats as hurric...       1g1d91d   
4    15-year-old girl sentenced to 3 years for kill...       1g1mbrz   
..                                                 ...           ...   
189  Helene floodwaters trapped Tennessee plastics ...       1ftc7i6   
190    Baseball legend Pete Rose dies at the age of 83       1ft9rnu   
191      State judge strikes down Georgia abortion ban       1ft65bh   
192  'Hamas leader' in Lebanon killed by Israel was...       1ft5ym9   
193  Israel says 'localised, targeted ground raids'...       1ft9sx2   

                   Author  Score  Upvote Ratio  Number of Comments  \
0              raresaturn  18805          0.92                 43

In [1]:
import os
from crewai import Agent, Task, Process, Crew

In [6]:
from langchain.llms import Ollama

ollama_llm = Ollama(model="mistral")

In [7]:
from pathlib import Path
notebook_dir = Path('.')
file_path = notebook_dir.absolute().parent / 'data' / 'secrets.txt'

secret_dict = {}
with open(file_path, 'r') as file:
    for line in file:
        line = line.strip()
        if '=' in line:
            key, value = line.split('=', 1)
            key = key.strip()
            value = value.strip().strip('"')
            secret_dict[key] = value


In [8]:
from langchain.tools import tool

class BrowserTool():
    @tool("Scrape reddit content")
    def scrape_reddit(max_comments_per_post=5):
        """Useful to scrape a reddit content"""
        reddit = praw.Reddit(
            client_id=secret_dict["REDDIT_CLIENT_ID"],
            client_secret=secret_dict["REDDIT_CLIENT_SECRET"],
            user_agent=UG,
        )
        subreddit = reddit.subreddit("LocalLLaMA")
        scraped_data = []

        for post in subreddit.hot(limit=10):
            post_data = {"title": post.title, "url": post.url, "comments": []}

            try:
                post.comments.replace_more(limit=0)  # Load top-level comments only
                comments = post.comments.list()
                if max_comments_per_post is not None:
                    comments = comments[:5]

                for comment in comments:
                    post_data["comments"].append(comment.body)

                scraped_data.append(post_data)

            except praw.exceptions.APIException as e:
                print(f"API Exception: {e}")
                time.sleep(60)  # Sleep for 1 minute before retrying

        return scraped_data

In [9]:
explorer = Agent(
    role="Senior Researcher",
    goal="Find and explore the most exciting projects and companies in AI and machine learning in 2024",
    backstory="""You are and Expert strategist that knows how to spot emerging trends and companies in AI, tech and machine learning. 
    You're great at finding interesting, exciting projects in Open Source/Indie Hacker space. You look for exciting, rising
    new github ai projects that get a lot of attention.
    """,
    verbose=True,
    allow_delegation=False,
    tools=[BrowserTool().scrape_reddit],
    llm=ollama_llm
)

2024-10-12 06:31:29,249 - 43732 - llm.py-llm:178 - ERROR: Failed to get supported params: argument of type 'NoneType' is not iterable



[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



In [10]:
writer = Agent(
    role='Senior Technical Writer',
    goal='Write engaging and interesting blog post about latest AI projects using simple, layman vocabulary',
    backstory="""You are an Expert Writer on technical innovation, especially 
    in the field of AI and machine learning. You know how to write in 
    engaging, interesting but simple, straightforward and concise. You know 
    how to present complicated technical terms to general audience in a 
    fun way by using layman words.""",
    verbose=True,
    allow_delegation=True,
    llm=ollama_llm
)
critic = Agent(
    role='Expert Writing Critic',
    goal='Provide feedback and criticize blog post drafts. Make sure that the tone and writing style is compelling, simple and concise',
    backstory="""You are an Expert at providing feedback to the technical
    writers. You can tell when a blog text isn't concise,
    simple or engaging enough. You know how to provide helpful feedback that 
    can improve any text. You know how to make sure that text 
    stays technical and insightful by using layman terms.
    """,
    verbose=True,
    allow_delegation=True,
    llm=ollama_llm
)

2024-10-12 06:31:34,487 - 43732 - llm.py-llm:178 - ERROR: Failed to get supported params: argument of type 'NoneType' is not iterable
2024-10-12 06:31:34,490 - 43732 - llm.py-llm:178 - ERROR: Failed to get supported params: argument of type 'NoneType' is not iterable



[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



In [None]:
task_report = Task(
    description="""Use and summarize scraped data from subreddit LocalLLama to
    make a detailed report on the latest rising projects in AI. Your final 
    answer MUST be a full analysis report, text only, ignore any code or 
    anything that isn't text. The report has to have bullet points and with
    5-10 exciting new AI projects and tools. Write names of every tool and 
    project. Each bullet point MUST contain 3 sentences that refer to one 
    specific ai company, product, model or anything you found on subreddit 
    LocalLLama. Use ONLY scraped data from LocalLLama to generate the report. 
    """,
    agent=explorer,
)

task_blog = Task(
    description="""Write a blog article with text only and with a short but 
    impactful headline and at least 10 paragraphs. Blog should summarize 
    the report on latest ai tools found on localLLama subreddit. Style and 
    tone should be compelling and concise, fun, technical but also use 
    layman words for the general public. Name specific new, exciting projects,
    apps and companies in AI world. Don't write "**Paragraph [number of the 
    paragraph]:**", instead start the new paragraph in a new line. Write names
    of projects and tools in BOLD.
    ALWAYS include links to projects/tools/research papers.
    """,
    agent=writer,
)

task_critique = Task(
    description="""Identify parts of the blog that aren't written concise 
    enough and rewrite and change them. Make sure that the blog has engaging 
    headline with 30 characters max, and that there are at least 10 paragraphs.
    Blog needs to be rewritten in such a way that it contains specific 
    names of models/companies/projects but also explanation of WHY a reader
    should be interested to research more. Always include links to each paper/
    project/company
    """,
    agent=critic,
)

In [None]:
# instantiate crew of agents
crew = Crew(
    agents=[explorer, writer, critic],
    tasks=[task_report, task_blog, task_critique],
    verbose=2,
    process=Process.sequential,  # Sequential process will have tasks executed one after the other and the outcome of the previous one is passed as extra content into this next.
)

# Get your crew to work!
result = crew.kickoff()

print("######################")
print(result)