diff --git a/notebooks/generative_ai/large_language_models.ipynb b/notebooks/generative_ai/large_language_models.ipynb index 08ef52b544..744706cab8 100644 --- a/notebooks/generative_ai/large_language_models.ipynb +++ b/notebooks/generative_ai/large_language_models.ipynb @@ -16,8 +16,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Prerequisites\n", - "Create session and define a BQ connection which we already created and allowlisted. " + "## Define the model" ] }, { @@ -29,33 +28,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/garrettwu/src/bigframes/bigframes/session/__init__.py:1762: UserWarning: No explicit location is set, so using location US for the session.\n", - " return Session(context)\n" + "/usr/local/google/home/garrettwu/src/bigframes/bigframes/ml/llm.py:589: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", + " self.session = session or bpd.get_global_session()\n" ] - } - ], - "source": [ - "session = bigframes.pandas.get_global_session()\n", - "connection = f\"{session.bqclient.project}.us.bigframes-default-connection\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define the model" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ + }, { "data": { "text/html": [ - "Query job 12bcd690-ca99-4001-bf26-032f50e77d62 is DONE. 0 Bytes processed. Open Job" + "Query job 675a6c8a-213b-496c-9f77-b87bf7cfa5e0 is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -66,7 +46,7 @@ } ], "source": [ - "model = GeminiTextGenerator(session=session, connection_name=connection)" + "model = GeminiTextGenerator()" ] }, { @@ -81,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -102,13 +82,13 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "Query job f8fe31c6-7d8a-4919-9492-8304a0083cca is DONE. 0 Bytes processed. Open Job" + "Query job 7967df2b-9f0f-45c8-a363-15f65891c3bf is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" @@ -118,21 +98,17 @@ "output_type": "display_data" }, { - "data": { - "text/html": [ - "Query job 28bab71f-e218-4d92-9a50-dab41bb0c71f is DONE. 24 Bytes processed. Open Job" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/__init__.py:108: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n", + " warnings.warn(\n" + ] }, { "data": { "text/html": [ - "Query job 01d66b61-459f-474e-9f66-d519f9c2f23d is DONE. 6 Bytes processed. Open Job" + "Query job 9a1f57cd-98e1-4eac-a1b3-8f88d61971cd is DONE. 6 Bytes processed. Open Job" ], "text/plain": [ "" @@ -144,7 +120,7 @@ { "data": { "text/html": [ - "Query job af606ca7-4bcf-4bd1-95fd-c516542b5a4f is DONE. 5.3 kB processed. Open Job" + "Query job 2a94a2cf-7d4c-4009-a798-d7a5d6d4049d is DONE. 8.5 kB processed. Open Job" ], "text/plain": [ "" @@ -183,28 +159,28 @@ " \n", " \n", " 0\n", - " **BigQuery**\n", + " ## BigQuery: A Serverless Data Warehouse\n", "\n", - "**Definition:**\n", - "\n", - "BigQuery is a s...\n", - " null\n", + "BigQ...\n", + " [{\"category\":1,\"probability\":1,\"probability_sc...\n", " \n", " What is BigQuery?\n", " \n", " \n", " 1\n", - " **BigQuery Machine Learning (BQML)**\n", + " ## BigQuery Machine Learning (BQML)\n", "\n", - "BQML is ...\n", - " null\n", + "BQML is a...\n", + " [{\"category\":1,\"probability\":1,\"probability_sc...\n", " \n", " What is BQML?\n", " \n", " \n", " 2\n", - " BigQuery DataFrame is a Python DataFrame imple...\n", - " null\n", + " ## What is BigQuery DataFrame?\n", + "\n", + "**BigQuery Dat...\n", + " [{\"category\":1,\"probability\":1,\"probability_sc...\n", " \n", " What is BigQuery DataFrame?\n", " \n", @@ -214,20 +190,20 @@ ], "text/plain": [ " ml_generate_text_llm_result \\\n", - "0 **BigQuery**\n", + "0 ## BigQuery: A Serverless Data Warehouse\n", "\n", - "**Definition:**\n", + "BigQ... \n", + "1 ## BigQuery Machine Learning (BQML)\n", "\n", - "BigQuery is a s... \n", - "1 **BigQuery Machine Learning (BQML)**\n", + "BQML is a... \n", + "2 ## What is BigQuery DataFrame?\n", "\n", - "BQML is ... \n", - "2 BigQuery DataFrame is a Python DataFrame imple... \n", + "**BigQuery Dat... \n", "\n", - " ml_generate_text_rai_result ml_generate_text_status \\\n", - "0 null \n", - "1 null \n", - "2 null \n", + " ml_generate_text_rai_result ml_generate_text_status \\\n", + "0 [{\"category\":1,\"probability\":1,\"probability_sc... \n", + "1 [{\"category\":1,\"probability\":1,\"probability_sc... \n", + "2 [{\"category\":1,\"probability\":1,\"probability_sc... \n", "\n", " prompt \n", "0 What is BigQuery? \n", @@ -235,7 +211,7 @@ "2 What is BigQuery DataFrame? " ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -255,16 +231,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'**BigQuery**\\n\\n**Definition:**\\n\\nBigQuery is a serverless, highly scalable, cloud-based data warehouse and analytics platform offered by Google Cloud.\\n\\n**Key Features:**\\n\\n* **Massive Scalability:** Can handle large datasets (petabytes or more) with fast query execution.\\n* **Elastic:** Automatically scales compute resources based on workload requirements.\\n* **Serverless:** Users do not need to manage infrastructure or provision resources.\\n* **Flexible Data Loading:** Supports a wide range of data sources, including files, databases, and streaming data.\\n* **SQL-Based Querying:** Uses standard SQL syntax for querying and analyzing data.\\n* **Machine Learning Integration:** Provides built-in machine learning capabilities for predictive analytics and data exploration.\\n* **Real-Time Analysis:** Supports streaming data analysis and interactive dashboards.\\n* **Collaboration and Sharing:** Allows multiple users to access and analyze data in a collaborative environment.\\n* **Cost-Effective:** Pay-as-you-go pricing based on data scanned and compute resources used.\\n\\n**Applications:**\\n\\n* Data warehousing and analytics\\n* Business intelligence and reporting\\n* Data science and machine learning\\n* Data exploration and visualization\\n* Marketing analytics\\n* Fraud detection and risk management\\n\\n**Benefits:**\\n\\n* Rapid data analysis on large datasets\\n* Reduced infrastructure management overhead\\n* Increased agility and flexibility\\n* Enhanced collaboration and data sharing\\n* Cost-effective data storage and analytics'" + "\"## BigQuery: A Serverless Data Warehouse\\n\\nBigQuery is a serverless, cloud-based data warehouse that enables scalable analysis of large datasets. It's a popular choice for businesses of all sizes due to its ability to handle petabytes of data and run complex queries quickly and efficiently. Let's delve into its key features:\\n\\n**Serverless Architecture:** BigQuery eliminates the need for server management, allowing you to focus on analyzing data. Google manages the infrastructure, scaling resources up or down automatically based on your needs.\\n\\n**Scalability:** BigQuery can handle massive datasets, scaling seamlessly as your data volume grows. It automatically distributes queries across its infrastructure, ensuring fast and efficient processing.\\n\\n**SQL-like Querying:** BigQuery uses a familiar SQL-like syntax, making it easy for data analysts and developers to learn and use. This allows them to leverage their existing SQL knowledge for data exploration and analysis.\\n\\n**Cost-Effectiveness:** BigQuery offers a pay-as-you-go pricing model, meaning you only pay for the resources you use. This makes it a cost-effective solution for businesses with varying data processing needs.\\n\\n**Integration with Google Cloud:** BigQuery integrates seamlessly with other Google Cloud services like Cloud Storage, Dataflow, and Machine Learning, enabling a comprehensive data processing and analysis workflow within the Google Cloud ecosystem.\\n\\n**Security and Reliability:** BigQuery offers robust security features and high availability, ensuring data protection and reliable access.\\n\\n**Use Cases:** BigQuery finds applications in various scenarios, including:\\n\\n* **Data Warehousing:** Store and analyze large amounts of structured and semi-structured data.\\n* **Business Intelligence:** Generate insights from data for informed decision-making.\\n* **Data Analytics:** Perform complex data analysis and extract valuable patterns.\\n* **Machine Learning:** Train and deploy machine learning models on large datasets.\\n\\n**Getting Started:** To get started with BigQuery, you can create a free trial account on Google Cloud Platform and explore its features. Numerous tutorials and documentation are available to help you learn and use BigQuery effectively.\\n\\n## Additional Resources:\\n\\n* **BigQuery Documentation:** https://cloud.google.com/bigquery/docs/\\n* **BigQuery Quickstart:** https://cloud.google.com/bigquery/docs/quickstarts/quickstart-console\\n* **BigQuery Pricing:** https://cloud.google.com/bigquery/pricing\\n\\nFeel free to ask if you have any further questions about BigQuery!\"" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" }