In [3]:
!pip install git+https://github.com/openai/swarm.git
!pip install openai
!pip install firecrawl-py

Collecting git+https://github.com/openai/swarm.git
  Cloning https://github.com/openai/swarm.git to /tmp/pip-req-build-we93a6jb
  Running command git clone --filter=blob:none --quiet https://github.com/openai/swarm.git /tmp/pip-req-build-we93a6jb
  Resolved https://github.com/openai/swarm.git to commit 9db581cecaacea0d46a933d6453c312b034dbf47
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pre-commit (from swarm==0.1.0)
  Downloading pre_commit-4.0.1-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting instructor (from swarm==0.1.0)
  Downloading instructor-1.7.0-py3-none-any.whl.metadata (17 kB)
Collecting jiter<1,>=0.4.0 (from openai>=1.33.0->swarm==0.1.0)
  Downloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting cfgv>=2.0.0 (from pre-commit->swarm==0.1.0)
  Downloading cfgv-3.4.0-py2.py3-none-any.wh

In [4]:
import os
from firecrawl import FirecrawlApp
from swarm import Agent, Swarm
from openai import OpenAI
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from openai import ChatCompletion



In [14]:
# Configure API Keys

os.environ["OPENAI_API_KEY"]= ""

#Load dataset
df = pd.read_csv("https://raw.githubusercontent.com/jaydiaz2012/AI_First_Day_6_AI_Swarm/refs/heads/main/ai%20first%20sales%20data%20-%20sales%20(1).csv")

# Analyze dataset
def analyze_dataset():
    df['date'] = pd.to_datetime(df['date'])
    numerical_df = df.select_dtypes(include=np.number)

    summary = {
        "overview": df.describe(include="all").to_dict(),
        "null_values": df.isnull().sum().to_dict(),
        "correlations": numerical_df.corr().to_dict(),
    }
    return summary

def generate_web_insights():
    web_insights = {
        'source_pageviews': df.groupby('source')['pageviews'].sum(),
        'source_visits': df.groupby('source')['visits'].sum(),
        'source_transactions': df.groupby('source')['transactions'].sum(),
        'source_product_click': df.groupby('source')['productClick'].sum()
    }
    return web_insights

def generate_web_visuals():
    visualizations = {}

    # Page Views Distribution
    plt.figure(figsize=(10, 6))
    sns.lineplot(df, x="source", y="pageviews")
    plt.title("Page Views Distribution")
    visualizations['page_views_distribution']
    plt.show()

    # Visits Distribution
    plt.figure(figsize=(10, 6))
    sns.lineplot(df, x="source", y="visits")
    plt.title("Visits Distribution")
    visualizations['visits_distribution']
    plt.show()

    # Transactions Trends
    plt.figure(figsize=(10, 6))
    sns.lineplot(df, x="source", y="transactions")
    plt.title("Transactions Trends")
    visualizations['transactions_trends']
    plt.show()

    # Product Click Trends
    plt.figure(figsize=(10, 6))
    sns.lineplot(df, x="source", y="productClick")
    plt.title("Product Clicks Trends")
    visualizations['product_click_trends']
    plt.show()

    return visualizations

web_analytics_agent = Agent(
    name="Web Analyst Agent",
    model="gpt-4o-mini",
    instructions="""You are a web analyst agent that cleans the dataset and report on the statistical summary of the dataset""",
    functions=[analyze_dataset],
)

web_insights_agent = Agent(
    name="Web Insights Agent",
    model="gpt-4o-mini",
    instructions="""You are a Web Insights Agent that provide insights about the data provided and summarize the insights from other agents, including actionable recommendations""",
    functions=[generate_web_insights]
)

web_insights_visuals_agent = Agent(
    name="Web_Visuals_Agent",
    model="gpt-4o-mini",
    instructions="""You are a data visualization expert that create visualizations on a given datase and gives summary of the data visuals""",
    fuctions=[generate_web_visuals]
)

if __name__ == "__main__":
    client = Swarm()

    web_analytics_response = client.run(
        agent=web_analytics_agent,
        messages=[{
            "role": "user",
            "content": "Please clean and analyze my dataset."
        }]
    )

    print("\nWeb Analytics Results:")
    print(web_analytics_response.messages[-1]["content"])

    web_insights_response = client.run(
        agent=web_insights_agent,
        messages=[{
            "role": "user",
            "content": "Please provide insights based from my dataset."
        }]
    )

    print("\nWeb Insights Results:")
    print(web_insights_response.messages[-1]["content"])

    web_insights_visuals_response = client.run(
        agent=web_insights_visuals_agent,
        messages=[{
            "role": "user",
            "content": "Please provide charts using Seaborn or Matplotlib using the data from the given dataset."
        }]
    )

    print("\nWeb Analytics Visuals Results:")
    print(web_insights_visuals_response.messages[-1]["content"])


# Advanced analysis using Swarm
def advanced_analysis(prompt, model):
    try:
        prompt = (
            "Perform clustering, detect anomalies, and provide predictive trends "
            f"on this dataset:\n{df.to_csv(index=False)}"
        )
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a data analysis assistant using Swarm."},
                {"role": "user", "content": prompt}
            ],
        )
        return response.choices[0].message['content']
    except Exception as e:
        st.error(f"Error performing advanced analysis: {e}")
        return None
    print(advanced_analysis)


Web Analytics Results:
Here's the statistical summary and insights from your dataset:

### Overview of the Dataset

- **Total Entries**: 52,721

#### Key Fields:

1. **Date**
   - Range: January 1, 2020 - September 30, 2020
   - Mean: May 19, 2020

2. **Source**
   - Unique Sources: 22
   - Most Frequent Source: Facebook (12,954 occurrences)

3. **Medium**
   - Unique Mediums: 6
   - Most Frequent Medium: CPC (16,833 occurrences)

4. **Delivery Available**
   - Unique Categories: 3
   - Most Common Category: No data (31,953 occurrences)

5. **Device Type**
   - Unique Device Types: 3
   - Most Common Device Type: Mobile (24,416 occurrences)

6. **Promo Activated**
   - Unique Categories: 2
   - Most Common Category: No (27,548 occurrences)

7. **Filter Used**
   - Unique Categories: 2
   - Most Common Category: No (37,290 occurrences)

#### Engagement Metrics:
- **Pageviews**
  - Mean: 583.76
  - Min: 0
  - Max: 34,832
  - Outliers indicated by a high standard deviation (1452.00) whic