diff --git a/docs-site/scripts/check-links.py b/docs-site/scripts/check-links.py index a5661d6e..0079a0c4 100644 --- a/docs-site/scripts/check-links.py +++ b/docs-site/scripts/check-links.py @@ -22,6 +22,12 @@ # GitHub blob URLs pointing into this repo are checked as local files REPO_BLOB_PREFIX = "https://github.com/futuresearch/everyrow-sdk/blob/main/" +# Colab URLs pointing into this repo are checked as local files +REPO_COLAB_PREFIX = ( + "https://colab.research.google.com/github/" + "futuresearch/everyrow-sdk/blob/main/" +) + # Git LFS media URLs — the correct way to link to LFS-tracked files. # These are checked as local files instead of fetching from GitHub. REPO_LFS_PREFIX = ( @@ -186,6 +192,16 @@ def check_file( ) continue + # Colab links to this repo: check the notebook exists locally. + if url_without_fragment.startswith(REPO_COLAB_PREFIX): + rel_path = url_without_fragment[len(REPO_COLAB_PREFIX) :] + if not (REPO_ROOT / rel_path).exists(): + errors.append( + f" {page_label}: file not found for {href!r}" + f" (expected {rel_path})" + ) + continue + if url_without_fragment in SKIPPED_URLS: continue diff --git a/docs-site/src/app/case-studies/[slug]/page.tsx b/docs-site/src/app/case-studies/[slug]/page.tsx index 5cb88bb8..2bdf422a 100644 --- a/docs-site/src/app/case-studies/[slug]/page.tsx +++ b/docs-site/src/app/case-studies/[slug]/page.tsx @@ -1,5 +1,6 @@ import { notFound } from "next/navigation"; import { DocsLayout } from "@/components/DocsLayout"; +import { NotebookActions } from "@/components/NotebookActions"; import { getNavigation } from "@/utils/docs"; import { getNotebookBySlug, getNotebookSlugs } from "@/utils/notebooks"; @@ -50,6 +51,7 @@ export default async function NotebookPage({ params }: PageProps) { return ( +
+ + + View source + + + + Run in Colab + + + ); +} diff --git a/docs-site/src/styles/notebook.css b/docs-site/src/styles/notebook.css index dab6185a..9c77afbe 100644 --- a/docs-site/src/styles/notebook.css +++ b/docs-site/src/styles/notebook.css @@ -1,3 +1,38 @@ +/* Notebook action buttons (View source, Run in Colab) */ +.notebook-actions { + display: flex; + gap: 0.75rem; + margin-bottom: 1rem; + justify-content: flex-end; +} + +.notebook-action-link { + display: inline-flex; + align-items: center; + gap: 0.375rem; + padding: 0.375rem 0.75rem; + font-size: 0.8125rem; + font-weight: 500; + color: var(--muted); + border: 1px solid var(--border); + border-radius: 0.375rem; + text-decoration: none; + transition: color 0.15s, border-color 0.15s, background-color 0.15s; +} + +.notebook-action-link:hover { + color: var(--foreground); + border-color: var(--foreground); + background-color: var(--code-bg); + text-decoration: none; +} + +.notebook-action-colab:hover { + color: var(--accent); + border-color: var(--accent); + background-color: var(--accent-light); +} + /* * Notebook styles - scoped to .notebook-content * Based on Jupyter's default styling but minimal diff --git a/docs/case_studies/dedupe-crm-company-records/notebook.ipynb b/docs/case_studies/dedupe-crm-company-records/notebook.ipynb index 371eec41..eda7ea44 100644 --- a/docs/case_studies/dedupe-crm-company-records/notebook.ipynb +++ b/docs/case_studies/dedupe-crm-company-records/notebook.ipynb @@ -3,7 +3,11 @@ { "cell_type": "markdown", "metadata": {}, - "source": "# How to use LLMs to deduplicate CRM Data\n\nThis notebook demonstrates how to use the everyrow SDK's `dedupe` operation to deduplicate messy CRM data using AI-powered semantic matching." + "source": [ + "# How to use LLMs to deduplicate CRM Data\n", + "\n", + "This notebook demonstrates how to use the everyrow SDK's `dedupe` operation to deduplicate messy CRM data using AI-powered semantic matching." + ] }, { "cell_type": "markdown", @@ -19,7 +23,19 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "from datetime import datetime\nfrom textwrap import dedent\n\nimport pandas as pd\nfrom dotenv import load_dotenv\n\nfrom everyrow.ops import dedupe\n\nload_dotenv()" + "source": [ + "# !pip install everyrow\n", + "from datetime import datetime\n", + "from textwrap import dedent\n", + "\n", + "import pandas as pd\n", + "# load API key from environment/.env file or set it directly in the notebook\n", + "from dotenv import load_dotenv\n", + "load_dotenv()\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", + "from everyrow.ops import dedupe\n" + ] }, { "cell_type": "markdown", @@ -188,7 +204,7 @@ } ], "source": [ - "data = pd.read_csv(\"../data/case_01_crm_data.csv\", engine=\"pyarrow\")\n", + "data = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/case_01_crm_data.csv\", engine=\"pyarrow\")\n", "\n", "print(f\"Total records: {len(data)}\")\n", "data.sort_values(by=\"company_name\").head(15)" @@ -219,7 +235,18 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "equivalence_relation = dedent(\"\"\"\n Two entries are duplicates if they include data for the same legal entity.\n\"\"\")\n\nprint(\"Deduplicating CRM data...\\n\")\n\nresult = await dedupe(\n input=data,\n equivalence_relation=equivalence_relation,\n)" + "source": [ + "equivalence_relation = dedent(\"\"\"\n", + " Two entries are duplicates if they include data for the same legal entity.\n", + "\"\"\")\n", + "\n", + "print(\"Deduplicating CRM data...\\n\")\n", + "\n", + "result = await dedupe(\n", + " input=data,\n", + " equivalence_relation=equivalence_relation,\n", + ")" + ] }, { "cell_type": "markdown", @@ -584,6 +611,9 @@ } ], "metadata": { + "everyrow": { + "description": "Python notebook cleaning 500 CRM records with inconsistent company names, missing contacts, and partial email matches. Uses everyrow's dedupe() with a plain-English equivalence relation to find and group semantic duplicates." + }, "kernelspec": { "display_name": ".venv", "language": "python", @@ -601,10 +631,7 @@ "pygments_lexer": "ipython3", "version": "3.12.11" }, - "language_version": "3.12", - "everyrow": { - "description": "Python notebook cleaning 500 CRM records with inconsistent company names, missing contacts, and partial email matches. Uses everyrow's dedupe() with a plain-English equivalence relation to find and group semantic duplicates." - } + "language_version": "3.12" }, "nbformat": 4, "nbformat_minor": 4 diff --git a/docs/case_studies/llm-powered-merging-at-scale/notebook.ipynb b/docs/case_studies/llm-powered-merging-at-scale/notebook.ipynb index 798089e4..fd29c59e 100644 --- a/docs/case_studies/llm-powered-merging-at-scale/notebook.ipynb +++ b/docs/case_studies/llm-powered-merging-at-scale/notebook.ipynb @@ -39,6 +39,12 @@ "metadata": {}, "outputs": [], "source": [ + "# !pip install everyrow\n", + "# load API key from environment/.env file or set it directly in the notebook\n", + "from dotenv import load_dotenv\n", + "load_dotenv()\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", "import numpy as np\n", "import pandas as pd\n", "from everyrow.ops import merge\n", @@ -115,8 +121,8 @@ } ], "source": [ - "left_df = pd.read_csv(\"merge_websites_input_left_2246.csv\")\n", - "right_df = pd.read_csv(\"merge_websites_input_right_2246.csv\")\n", + "left_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/case_studies/llm-powered-merging-at-scale/merge_websites_input_left_2246.csv\")\n", + "right_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/case_studies/llm-powered-merging-at-scale/merge_websites_input_right_2246.csv\")\n", "\n", "print(f\"Left table: {len(left_df)} rows\")\n", "left_df.head(3)" @@ -240,8 +246,8 @@ "for n in [100, 200, 400, 800, 1600, 2246]:\n", " result = await merge(\n", " task=\"Match each person to their website(s).\",\n", - " left_table=pd.read_csv(f\"merge_websites_input_left_{n}.csv\"),\n", - " right_table=pd.read_csv(f\"merge_websites_input_right_{n}.csv\"),\n", + " left_table=pd.read_csv(f\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/case_studies/llm-powered-merging-at-scale/merge_websites_input_left_{n}.csv\"),\n", + " right_table=pd.read_csv(f\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/case_studies/llm-powered-merging-at-scale/merge_websites_input_right_{n}.csv\"),\n", " )\n", " print(f\"n={n}\")\n", " print(\"num of matched rows:\", len(result.data))\n", @@ -307,7 +313,7 @@ "metadata": {}, "outputs": [], "source": [ - "results_df = pd.read_csv(\"merge_websites_output_800.csv\")" + "results_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/case_studies/llm-powered-merging-at-scale/merge_websites_output_800.csv\")" ] }, { diff --git a/docs/case_studies/llm-powered-screening-at-scale/notebook.ipynb b/docs/case_studies/llm-powered-screening-at-scale/notebook.ipynb index b5968aa2..4972ad37 100644 --- a/docs/case_studies/llm-powered-screening-at-scale/notebook.ipynb +++ b/docs/case_studies/llm-powered-screening-at-scale/notebook.ipynb @@ -35,7 +35,7 @@ "metadata": {}, "outputs": [], "source": [ - "from dotenv import load_dotenv\n", + "# !pip install everyrow\n", "import pandas as pd\n", "from everyrow import create_session\n", "from everyrow.ops import screen\n", @@ -43,7 +43,11 @@ "pd.set_option(\"display.max_colwidth\", None)\n", "\n", "\n", - "load_dotenv()" + "# load API key from environment/.env file or set it directly in the notebook\n", + "from dotenv import load_dotenv\n", + "load_dotenv()\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n" ] }, { @@ -217,7 +221,7 @@ } ], "source": [ - "fda_product_recalls_df = pd.read_csv(\"fda_product_recalls.csv\")\n", + "fda_product_recalls_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/case_studies/llm-powered-screening-at-scale/fda_product_recalls.csv\")\n", "\n", "# Filter to recalls where center_classification_date is after 2021-08-01 to get a dataset with ≈10k rows\n", "fda_product_recalls_df[\"center_classification_date\"] = pd.to_datetime(fda_product_recalls_df[\"center_classification_date\"], errors=\"coerce\")\n", @@ -311,7 +315,7 @@ "metadata": {}, "outputs": [], "source": [ - "results_df = pd.read_csv(\"Screen child product recalls.csv\") # download from https://everyrow.io/sessions/df145a50-2dfd-48c6-97ed-6f82a82bca66" + "results_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/case_studies/llm-powered-screening-at-scale/Screen%20child%20product%20recalls.csv\") # download from https://everyrow.io/sessions/df145a50-2dfd-48c6-97ed-6f82a82bca66" ] }, { diff --git a/docs/case_studies/llm-web-research-agents-at-scale/notebook.ipynb b/docs/case_studies/llm-web-research-agents-at-scale/notebook.ipynb index dfd2fac3..0077e8e5 100644 --- a/docs/case_studies/llm-web-research-agents-at-scale/notebook.ipynb +++ b/docs/case_studies/llm-web-research-agents-at-scale/notebook.ipynb @@ -39,7 +39,7 @@ "metadata": {}, "outputs": [], "source": [ - "from dotenv import load_dotenv\n", + "# !pip install everyrow\n", "import pandas as pd\n", "from pydantic import BaseModel, Field\n", "from everyrow import create_session\n", @@ -48,7 +48,11 @@ "pd.set_option(\"display.max_colwidth\", None)\n", "\n", "\n", - "load_dotenv()" + "# load API key from environment/.env file or set it directly in the notebook\n", + "from dotenv import load_dotenv\n", + "load_dotenv()\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n" ] }, { @@ -166,7 +170,7 @@ } ], "source": [ - "input_df = pd.read_csv(\"regulatory_status_results.csv\", usecols=[\"row_id\", \"trade_name\", \"ingredient\", \"applicant\", \"strength\", \"dosage_form\"])\n", + "input_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/case_studies/llm-web-research-agents-at-scale/regulatory_status_results.csv\", usecols=[\"row_id\", \"trade_name\", \"ingredient\", \"applicant\", \"strength\", \"dosage_form\"])\n", "print(f\"{len(input_df):,} drug products\")\n", "print(f\"Columns: {list(input_df.columns)}\")\n", "input_df.head(5)" @@ -356,7 +360,7 @@ } ], "source": [ - "results_df = pd.read_csv(\"regulatory_status_results.csv\")\n", + "results_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/case_studies/llm-web-research-agents-at-scale/regulatory_status_results.csv\")\n", "print(f\"Total rows: {len(results_df):,}\")\n", "print(f\"Rows with results: {results_df['regulatory_status'].notna().sum():,}\")\n", "print(f\"Failed rows: {results_df['regulatory_status'].isna().sum()}\")\n", @@ -848,4 +852,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/docs/case_studies/match-software-vendors-to-requirements/notebook.ipynb b/docs/case_studies/match-software-vendors-to-requirements/notebook.ipynb index ad0e28b9..680d7a78 100644 --- a/docs/case_studies/match-software-vendors-to-requirements/notebook.ipynb +++ b/docs/case_studies/match-software-vendors-to-requirements/notebook.ipynb @@ -25,8 +25,12 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install everyrow\n", - "%env EVERYROW_API_KEY=your_api_key" + "# !pip install everyrow\n", + "# load API key from environment/.env file or set it directly in the notebook\n", + "from dotenv import load_dotenv\n", + "load_dotenv()\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n" ] }, { @@ -168,7 +172,7 @@ "from everyrow.generated.models import LLMEnum\n", "\n", "# Load dataset: 438 S&P 500 companies\n", - "data = pd.read_csv(\"../data/companies.csv\")\n", + "data = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/companies.csv\")\n", "print(f\"Dataset: {data.shape[0]} companies, {data.shape[1]} columns\")\n", "data.head()" ] diff --git a/docs/case_studies/merge-contacts-with-company-data/notebook.ipynb b/docs/case_studies/merge-contacts-with-company-data/notebook.ipynb index f6a940fc..4a284781 100644 --- a/docs/case_studies/merge-contacts-with-company-data/notebook.ipynb +++ b/docs/case_studies/merge-contacts-with-company-data/notebook.ipynb @@ -18,10 +18,13 @@ }, "outputs": [], "source": [ + "# !pip install everyrow\n", "import asyncio\n", + "# load API key from environment/.env file or set it directly in the notebook\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", - "\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", "import pandas as pd\n", "from everyrow import create_session\n", "from everyrow.ops import merge" @@ -188,7 +191,7 @@ ], "source": [ "# Contacts table\n", - "contacts_df = pd.read_csv(\"../data/crm_contacts.csv\")\n", + "contacts_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/crm_contacts.csv\")\n", "\n", "print(f\"Contacts: {len(contacts_df)}\")\n", "contacts_df" @@ -359,7 +362,7 @@ ], "source": [ "# Funds/Companies table with enriched information\n", - "funds_df = pd.read_csv(\"../data/crm_funds.csv\")\n", + "funds_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/crm_funds.csv\")\n", "\n", "print(f\"Funds: {len(funds_df)}\")\n", "funds_df" diff --git a/docs/case_studies/merge-overlapping-contact-lists/notebook.ipynb b/docs/case_studies/merge-overlapping-contact-lists/notebook.ipynb index 0aef71bd..8d682af7 100644 --- a/docs/case_studies/merge-overlapping-contact-lists/notebook.ipynb +++ b/docs/case_studies/merge-overlapping-contact-lists/notebook.ipynb @@ -18,10 +18,13 @@ }, "outputs": [], "source": [ + "# !pip install everyrow\n", "import asyncio\n", + "# load API key from environment/.env file or set it directly in the notebook\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", - "\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", "import pandas as pd\n", "from everyrow import create_session\n", "from everyrow.ops import merge" @@ -206,7 +209,7 @@ ], "source": [ "# List A: From a conference attendee export\n", - "list_a = pd.read_csv(\"../data/contacts_list_a.csv\").fillna(\"\")\n", + "list_a = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/contacts_list_a.csv\").fillna(\"\")\n", "\n", "print(f\"List A: {len(list_a)} contacts\")\n", "list_a" @@ -366,7 +369,7 @@ ], "source": [ "# List B: From a research collaboration database\n", - "list_b = pd.read_csv(\"../data/contacts_list_b.csv\").fillna(\"\")\n", + "list_b = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/contacts_list_b.csv\").fillna(\"\")\n", "\n", "print(f\"List B: {len(list_b)} contacts\")\n", "list_b" diff --git a/docs/case_studies/multi-stage-lead-qualification/notebook.ipynb b/docs/case_studies/multi-stage-lead-qualification/notebook.ipynb index 52e4fc4f..5d3fc709 100644 --- a/docs/case_studies/multi-stage-lead-qualification/notebook.ipynb +++ b/docs/case_studies/multi-stage-lead-qualification/notebook.ipynb @@ -18,10 +18,13 @@ }, "outputs": [], "source": [ + "# !pip install everyrow\n", "import asyncio\n", + "# load API key from environment/.env file or set it directly in the notebook\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", - "\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", "import pandas as pd\n", "from pydantic import BaseModel, Field\n", "from everyrow import create_session\n", @@ -200,7 +203,7 @@ } ], "source": [ - "funds_df = pd.read_csv(\"../data/investment_funds.csv\")\n", + "funds_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/investment_funds.csv\")\n", "\n", "print(f\"Loaded {len(funds_df)} funds\")\n", "funds_df.head(10)" diff --git a/docs/case_studies/research-and-rank-permit-times/notebook.ipynb b/docs/case_studies/research-and-rank-permit-times/notebook.ipynb index 487c0d07..be556338 100644 --- a/docs/case_studies/research-and-rank-permit-times/notebook.ipynb +++ b/docs/case_studies/research-and-rank-permit-times/notebook.ipynb @@ -11,10 +11,13 @@ "metadata": {}, "outputs": [], "source": [ + "# !pip install everyrow\n", "import asyncio\n", + "# load API key from environment/.env file or set it directly in the notebook\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", - "\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", "import pandas as pd\n", "from everyrow import create_session\n", "from everyrow.ops import rank" @@ -150,7 +153,7 @@ } ], "source": [ - "texas_cities_df = pd.read_csv(\"../data/texas_cities.csv\")\n", + "texas_cities_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/texas_cities.csv\")\n", "\n", "print(f\"Analyzing {len(texas_cities_df)} Texas cities\")\n", "texas_cities_df.head(10)" diff --git a/docs/case_studies/score-leads-from-fragmented-data/notebook.ipynb b/docs/case_studies/score-leads-from-fragmented-data/notebook.ipynb index 6441dbd5..80c2358b 100644 --- a/docs/case_studies/score-leads-from-fragmented-data/notebook.ipynb +++ b/docs/case_studies/score-leads-from-fragmented-data/notebook.ipynb @@ -18,10 +18,13 @@ }, "outputs": [], "source": [ + "# !pip install everyrow\n", "import asyncio\n", + "# load API key from environment/.env file or set it directly in the notebook\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", - "\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", "import pandas as pd\n", "from everyrow import create_session\n", "from everyrow.ops import rank" @@ -187,7 +190,7 @@ } ], "source": [ - "companies_df = pd.read_csv(\"../data/b2b_companies.csv\")\n", + "companies_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/b2b_companies.csv\")\n", "\n", "print(f\"Loaded {len(companies_df)} companies\")\n", "companies_df.head(10)" diff --git a/docs/case_studies/score-leads-without-crm-history/notebook.ipynb b/docs/case_studies/score-leads-without-crm-history/notebook.ipynb index 65293d4d..658a8465 100644 --- a/docs/case_studies/score-leads-without-crm-history/notebook.ipynb +++ b/docs/case_studies/score-leads-without-crm-history/notebook.ipynb @@ -18,10 +18,13 @@ }, "outputs": [], "source": [ + "# !pip install everyrow\n", "import asyncio\n", + "# load API key from environment/.env file or set it directly in the notebook\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", - "\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", "import pandas as pd\n", "from everyrow import create_session\n", "from everyrow.ops import rank" @@ -232,7 +235,7 @@ } ], "source": [ - "firms_df = pd.read_csv(\"../data/investment_firms.csv\")\n", + "firms_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/investment_firms.csv\")\n", "\n", "print(f\"Loaded {len(firms_df)} investment firms\")\n", "firms_df" diff --git a/docs/case_studies/screen-job-postings-by-criteria/notebook.ipynb b/docs/case_studies/screen-job-postings-by-criteria/notebook.ipynb index ded65391..8c7e3437 100644 --- a/docs/case_studies/screen-job-postings-by-criteria/notebook.ipynb +++ b/docs/case_studies/screen-job-postings-by-criteria/notebook.ipynb @@ -18,10 +18,13 @@ }, "outputs": [], "source": [ + "# !pip install everyrow\n", "import asyncio\n", + "# load API key from environment/.env file or set it directly in the notebook\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", - "\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", "import pandas as pd\n", "from pydantic import BaseModel, Field\n", "from everyrow import create_session\n", @@ -143,7 +146,7 @@ } ], "source": [ - "job_postings = pd.read_csv(\"../data/job_postings.csv\")\n", + "job_postings = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/job_postings.csv\")\n", "\n", "print(f\"Loaded {len(job_postings)} job postings\")\n", "job_postings.head()" diff --git a/docs/case_studies/screen-stocks-by-investment-thesis/notebook.ipynb b/docs/case_studies/screen-stocks-by-investment-thesis/notebook.ipynb index 870a4fdf..2d7281ff 100644 --- a/docs/case_studies/screen-stocks-by-investment-thesis/notebook.ipynb +++ b/docs/case_studies/screen-stocks-by-investment-thesis/notebook.ipynb @@ -11,17 +11,19 @@ "metadata": {}, "outputs": [], "source": [ + "# !pip install everyrow\n", "# Setup\n", "import asyncio\n", "from pathlib import Path\n", "\n", "import pandas as pd\n", "from pydantic import BaseModel, Field\n", - "from dotenv import load_dotenv\n", "\n", - "# Load API key from .env\n", + "# load API key from environment/.env file or set it directly in the notebook\n", + "from dotenv import load_dotenv\n", "load_dotenv()\n", - "\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", "from everyrow.ops import screen" ] }, @@ -158,7 +160,7 @@ ], "source": [ "# Load S&P 500 companies\n", - "stocks = pd.read_csv(\"../data/S&P 500 Companies.csv\")\n", + "stocks = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/S%26P%20500%20Companies.csv\")\n", "print(f\"Loaded {len(stocks)} companies\")\n", "stocks.head()" ] diff --git a/docs/case_studies/screen-stocks-by-margin-sensitivity/notebook.ipynb b/docs/case_studies/screen-stocks-by-margin-sensitivity/notebook.ipynb index dab730a1..03bbcc98 100644 --- a/docs/case_studies/screen-stocks-by-margin-sensitivity/notebook.ipynb +++ b/docs/case_studies/screen-stocks-by-margin-sensitivity/notebook.ipynb @@ -11,17 +11,19 @@ "metadata": {}, "outputs": [], "source": [ + "# !pip install everyrow\n", "# Setup\n", "import asyncio\n", "from pathlib import Path\n", "\n", "import pandas as pd\n", "from pydantic import BaseModel, Field\n", - "from dotenv import load_dotenv\n", "\n", - "# Load API key from .env\n", + "# load API key from environment/.env file or set it directly in the notebook\n", + "from dotenv import load_dotenv\n", "load_dotenv()\n", - "\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", "from everyrow.ops import screen" ] }, @@ -158,7 +160,7 @@ ], "source": [ "# Load S&P 500 companies\n", - "stocks = pd.read_csv(\"../data/S&P 500 Companies.csv\")\n", + "stocks = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/S%26P%20500%20Companies.csv\")\n", "print(f\"Loaded {len(stocks)} companies\")\n", "stocks.head()" ] diff --git a/docs/in_progress/basic-usage/notebook.ipynb b/docs/in_progress/basic-usage/notebook.ipynb index ec09fef0..987c9480 100644 --- a/docs/in_progress/basic-usage/notebook.ipynb +++ b/docs/in_progress/basic-usage/notebook.ipynb @@ -36,16 +36,20 @@ } ], "source": [ + "# !pip install everyrow\n", "import os\n", "from textwrap import dedent\n", "\n", - "from dotenv import load_dotenv\n", "from pandas import DataFrame\n", "from pydantic import BaseModel, Field\n", "\n", "from everyrow.session import create_session\n", "\n", - "load_dotenv()" + "# load API key from environment/.env file or set it directly in the notebook\n", + "from dotenv import load_dotenv\n", + "load_dotenv()\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n" ] }, { diff --git a/docs/in_progress/dedupe-researchers-across-career-changes/notebook.ipynb b/docs/in_progress/dedupe-researchers-across-career-changes/notebook.ipynb index 1489ae15..409ae1d1 100644 --- a/docs/in_progress/dedupe-researchers-across-career-changes/notebook.ipynb +++ b/docs/in_progress/dedupe-researchers-across-career-changes/notebook.ipynb @@ -19,10 +19,13 @@ "metadata": {}, "outputs": [], "source": [ + "# !pip install everyrow\n", "import asyncio\n", + "# load API key from environment/.env file or set it directly in the notebook\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", - "\n", + "# import os\n", + "# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n", "import pandas as pd\n", "from everyrow import create_session\n", "from everyrow.ops import dedupe" @@ -40,7 +43,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "researchers_df = pd.read_csv(\"../data/researchers.csv\")\n\n# Fill NaN values with empty strings to avoid JSON serialization issues\nresearchers_df = researchers_df.fillna(\"\")\n\nprint(f\"Researcher records: {len(researchers_df)}\")\nprint(f\"Expected unique people: ~12\")\nresearchers_df" + "source": "researchers_df = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/researchers.csv\")\n\n# Fill NaN values with empty strings to avoid JSON serialization issues\nresearchers_df = researchers_df.fillna(\"\")\n\nprint(f\"Researcher records: {len(researchers_df)}\")\nprint(f\"Expected unique people: ~12\")\nresearchers_df" }, { "cell_type": "markdown", @@ -235,4 +238,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +}