Skip to content

Commit 59cbc5d

Browse files
authored
docs: use direct API for audio transcription (#2447)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<478952827> 🦕
1 parent 84c6f88 commit 59cbc5d

File tree

1 file changed

+54
-41
lines changed

1 file changed

+54
-41
lines changed

notebooks/multimodal/multimodal_dataframe.ipynb

Lines changed: 54 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292
},
9393
{
9494
"cell_type": "code",
95-
"execution_count": 3,
95+
"execution_count": 9,
9696
"metadata": {
9797
"colab": {
9898
"base_uri": "https://localhost:8080/"
@@ -1459,99 +1459,112 @@
14591459
"cell_type": "markdown",
14601460
"metadata": {},
14611461
"source": [
1462-
"### 6. Audio transcribe function"
1462+
"### 6. Audio transcribe"
14631463
]
14641464
},
14651465
{
14661466
"cell_type": "code",
1467-
"execution_count": 21,
1467+
"execution_count": 10,
14681468
"metadata": {},
1469-
"outputs": [
1470-
{
1471-
"name": "stderr",
1472-
"output_type": "stream",
1473-
"text": [
1474-
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
1475-
"instead of using `db_dtypes` in the future when available in pandas\n",
1476-
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
1477-
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
1478-
]
1479-
}
1480-
],
1469+
"outputs": [],
14811470
"source": [
14821471
"audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\n",
14831472
"df = bpd.from_glob_path(audio_gcs_path, name=\"audio\")"
14841473
]
14851474
},
14861475
{
14871476
"cell_type": "code",
1488-
"execution_count": 22,
1477+
"execution_count": 11,
14891478
"metadata": {},
14901479
"outputs": [
14911480
{
14921481
"name": "stderr",
14931482
"output_type": "stream",
14941483
"text": [
1495-
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
1496-
"instead of using `db_dtypes` in the future when available in pandas\n",
1497-
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
1498-
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n",
1499-
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
1484+
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
15001485
"instead of using `db_dtypes` in the future when available in pandas\n",
15011486
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
15021487
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
15031488
]
15041489
},
15051490
{
15061491
"data": {
1492+
"text/html": [
1493+
"<pre>0 Now, as all books, not primarily intended as p...</pre>"
1494+
],
15071495
"text/plain": [
15081496
"0 Now, as all books, not primarily intended as p...\n",
15091497
"Name: transcribed_content, dtype: string"
15101498
]
15111499
},
1512-
"execution_count": 22,
1500+
"execution_count": 11,
15131501
"metadata": {},
15141502
"output_type": "execute_result"
15151503
}
15161504
],
15171505
"source": [
1518-
"transcribed_series = df['audio'].blob.audio_transcribe(model_name=\"gemini-2.0-flash-001\", verbose=False)\n",
1506+
"import bigframes.bigquery as bbq\n",
1507+
"import bigframes.operations as ops\n",
1508+
"\n",
1509+
"# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n",
1510+
"# Here's how to perform the same operation directly:\n",
1511+
"\n",
1512+
"audio_series = df['audio']\n",
1513+
"prompt_text = (\n",
1514+
" \"**Task:** Transcribe the provided audio. **Instructions:** - Your response \"\n",
1515+
" \"must contain only the verbatim transcription of the audio. - Do not include \"\n",
1516+
" \"any introductory text, summaries, or conversational filler in your response. \"\n",
1517+
" \"The output should begin directly with the first word of the audio.\"\n",
1518+
")\n",
1519+
"\n",
1520+
"# Convert the audio series to the runtime representation required by the model.\n",
1521+
"# This involves fetching metadata and getting a signed access URL.\n",
1522+
"audio_metadata = audio_series._apply_unary_op(ops.obj_fetch_metadata_op)\n",
1523+
"audio_runtime = audio_metadata._apply_unary_op(ops.ObjGetAccessUrl(mode=\"R\"))\n",
1524+
"\n",
1525+
"transcribed_results = bbq.ai.generate(\n",
1526+
" prompt=(prompt_text, audio_runtime),\n",
1527+
" endpoint=\"gemini-2.0-flash-001\",\n",
1528+
" model_params={\"generationConfig\": {\"temperature\": 0.0}},\n",
1529+
")\n",
1530+
"\n",
1531+
"transcribed_series = transcribed_results.struct.field(\"result\").rename(\"transcribed_content\")\n",
15191532
"transcribed_series"
15201533
]
15211534
},
15221535
{
15231536
"cell_type": "code",
1524-
"execution_count": 23,
1537+
"execution_count": 12,
15251538
"metadata": {},
15261539
"outputs": [
1527-
{
1528-
"name": "stderr",
1529-
"output_type": "stream",
1530-
"text": [
1531-
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
1532-
"instead of using `db_dtypes` in the future when available in pandas\n",
1533-
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
1534-
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n",
1535-
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
1536-
"instead of using `db_dtypes` in the future when available in pandas\n",
1537-
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
1538-
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
1539-
]
1540-
},
15411540
{
15421541
"data": {
1542+
"text/html": [
1543+
"<pre>0 {'status': '', 'content': 'Now, as all books, ...</pre>"
1544+
],
15431545
"text/plain": [
15441546
"0 {'status': '', 'content': 'Now, as all books, ...\n",
15451547
"Name: transcription_results, dtype: struct<status: string, content: string>[pyarrow]"
15461548
]
15471549
},
1548-
"execution_count": 23,
1550+
"execution_count": 12,
15491551
"metadata": {},
15501552
"output_type": "execute_result"
15511553
}
15521554
],
15531555
"source": [
1554-
"transcribed_series_verbose = df['audio'].blob.audio_transcribe(model_name=\"gemini-2.0-flash-001\", verbose=True)\n",
1556+
"# To get verbose results (including status), we can extract both fields from the result struct.\n",
1557+
"transcribed_content_series = transcribed_results.struct.field(\"result\")\n",
1558+
"transcribed_status_series = transcribed_results.struct.field(\"status\")\n",
1559+
"\n",
1560+
"transcribed_series_verbose = bpd.DataFrame(\n",
1561+
" {\n",
1562+
" \"status\": transcribed_status_series,\n",
1563+
" \"content\": transcribed_content_series,\n",
1564+
" }\n",
1565+
")\n",
1566+
"# Package as a struct for consistent display\n",
1567+
"transcribed_series_verbose = bbq.struct(transcribed_series_verbose).rename(\"transcription_results\")\n",
15551568
"transcribed_series_verbose"
15561569
]
15571570
},
@@ -1657,7 +1670,7 @@
16571670
"name": "python",
16581671
"nbconvert_exporter": "python",
16591672
"pygments_lexer": "ipython3",
1660-
"version": "3.10.18"
1673+
"version": "3.13.0"
16611674
}
16621675
},
16631676
"nbformat": 4,

0 commit comments

Comments
 (0)