In [97]:
import ergodic
import os
from dotenv import load_dotenv
import logging

logging.basicConfig(level=logging.INFO)

load_dotenv()

username = os.getenv("ERGODIC_API_USERNAME")
password = os.getenv("ERGODIC_API_PASSWORD")
api_url = "https://ergbackendv3.azurewebsites.net"

In [None]:
from ergodic.client import ErgodicClient

client = ErgodicClient(api_url, username, password)

In [None]:
extraction = client.extraction.extract_features(
    data=[
        {"text": "Hello, Andre!", "title": "1"},
        {"text": "Hello, Zubair!", "title": "2"},
    ],
    description="Extract the greeting and the recipient name from the text",
)

In [None]:
extraction

In [None]:
extraction.get_data_model()

In [107]:
from pydantic import BaseModel, Field


class Greeting(BaseModel):
    """Extract the greeting and the recipient name from the text"""

    greeting: str = Field(description="The greeting")
    recipient: str = Field(description="The recipient name")

In [None]:
extraction = client.extraction.extract_features(
    data=[
        {"text": "Hello, Andre!", "title": "1"},
        {"text": "Hello, Zubair!", "title": "2"},
    ],
    data_model=Greeting,
)

In [None]:
extraction

In [110]:
result = extraction.wait()

In [None]:
result

In [86]:
import pandas

df = pandas.read_pickle("../data/product_sheets_mapping_curated.pkl")
json_data = df[["PLID_KERNEL", "text"]].copy()
json_data.rename(columns={"PLID_KERNEL": "title"}, inplace=True)
json_data = json_data.to_dict(orient="records")

In [None]:
from pydantic import BaseModel, Field


class ProductFeatures(BaseModel):
    """Extract features related to the product"""

    product_name: str = Field(description="The name of the product")
    max_bandwidth: float = Field(
        description="The maximum bandwidth of the product, in Mbps"
    )
    wifi_generation: str = Field(
        description="The wifi generation of the product, can be one of: WiFi 4, WiFi 5, WiFi 6, WiFi 6E or WiFi 7"
    )
    wifi_standard: str = Field(description="The wifi standard of the product")


# create an endpoint that receives an open ended description and a text and returns a schema
extraction = client.extraction.extract_features(
    data=json_data[:10],
    description="Extract the name, max bandwidth, wifi generation and wifi standard from the text",
)

# add unsupervised extraction as well
# do entity recognition first - two step process
# then do the extraction

In [122]:
data = extraction.wait()

In [None]:
def process_plid_name(plid_name):
    plid_name = plid_name.replace("AIR", "")
    names = plid_name.split("-")
    names = [x for x in names if x != ""]
    if len(names[0]) > 3:
        return names[0]
    else:
        return names[0] + "-" + names[1]

In [91]:
sales_df = pandas.read_parquet(
    "../data/wireless_meraki.parquet",
)
sales_df["PLID_KERNEL"] = sales_df["PLID"].apply(process_plid_name)

In [93]:
sales = "MFG_BOOKINGS_DOLLAR_total_demand_sum"
total_sales = sales_df.groupby("PLID_KERNEL")[sales].sum()

In [None]:
total_sales.sort_values(ascending=False)