From 1ce7ba5c4f750960ccddde0ff115a3f628a84bc2 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Mon, 31 Mar 2025 14:35:15 -0400 Subject: [PATCH] add guide on brackets --- .github/workflows/notebooks.yml | 2 + _toc.yml | 3 + brackets.ipynb | 949 ++++++++++++++++++++++++++++++++ hw_1.md | 1 + 4 files changed, 955 insertions(+) create mode 100644 brackets.ipynb diff --git a/.github/workflows/notebooks.yml b/.github/workflows/notebooks.yml index 6e8bc4f2..42b488ac 100644 --- a/.github/workflows/notebooks.yml +++ b/.github/workflows/notebooks.yml @@ -19,12 +19,14 @@ jobs: - lecture_1_exercise_solution.ipynb - lecture_2.ipynb - lecture_2_exercise.ipynb + - lecture_2_exercise_2_solution.ipynb - lecture_3.ipynb - lecture_3_exercise_solution.ipynb - lecture_4.ipynb - lecture_5.ipynb - lecture_5_exercise_solution.ipynb - lecture_6.ipynb + - brackets.ipynb - curve.ipynb - extras/pandas_crash_course.ipynb # https://github.com/mamba-org/setup-micromamba?tab=readme-ov-file#about-login-shells diff --git a/_toc.yml b/_toc.yml index c06d44e7..484ff02d 100644 --- a/_toc.yml +++ b/_toc.yml @@ -12,6 +12,9 @@ chapters: - file: curve {%- endif %} - file: resources + sections: + - file: brackets + title: Brackets - url: "{{lms_url}}" title: "{{lms_name}}" - url: "{{discussions_url}}" diff --git a/brackets.ipynb b/brackets.ipynb new file mode 100644 index 00000000..12a6ed7c --- /dev/null +++ b/brackets.ipynb @@ -0,0 +1,949 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cc8a237f-51e2-4d66-ba4f-03ecd45d0577", + "metadata": {}, + "source": [ + "# Brackets in Python and pandas\n", + "\n", + "A common source of confusion for those who are new to Python and pandas are the uses of different types of brackets. Hopefully this guide can clarify them for you." + ] + }, + { + "cell_type": "markdown", + "id": "8620eca0-a96b-4b1a-bbef-a94f0b5a9c33", + "metadata": {}, + "source": [ + "## Curly braces\n", + "\n", + "**`{` and `}`**\n", + "\n", + "Curly braces are used to define Python dictionaries (dicts)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "49f8a0a4-6cce-49ab-9a1a-314ba980cab2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_dict = {\n", + " \"A\": 1,\n", + " \"B\": 2,\n", + " \"C\": 3,\n", + "}\n", + "type(my_dict)" + ] + }, + { + "cell_type": "markdown", + "id": "e6da7b5c-85c0-4290-9c71-f88763a87ba8", + "metadata": {}, + "source": [ + "## Square brackets\n", + "\n", + "**`[` and `]`**" + ] + }, + { + "cell_type": "markdown", + "id": "0ca9095e-aa8d-4322-9d39-bb9f97dbdc11", + "metadata": {}, + "source": [ + "### Lists\n", + "\n", + "Python uses square brackets to define lists." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "953b8e03-b9f9-422b-868a-225db49ffd12", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_list = [\"A\", \"B\", \"C\"]\n", + "type(my_list)" + ] + }, + { + "cell_type": "markdown", + "id": "d58aa5a1-860b-4a13-8c95-83f4aa167b83", + "metadata": {}, + "source": [ + "They are used to retrieve an element, by passing in an index." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "25f91c97-767b-43a1-9588-46a6835d056f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'B'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_list[1]" + ] + }, + { + "cell_type": "markdown", + "id": "7b51f822-4e33-477f-976a-6286dafb19e9", + "metadata": {}, + "source": [ + "…or a slice:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "02890294-5655-443e-b221-2ed1a99d93a5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['B', 'C']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_list[1:]" + ] + }, + { + "cell_type": "markdown", + "id": "d5361cce-6103-4ed8-bd93-923784cc20b0", + "metadata": {}, + "source": [ + "### Dictionaries\n", + "\n", + "When you have a Python dict, you retrieve values by passing the key between square brackets." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7a0a0084-3dab-47ad-85d7-204778bb1629", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_dict[\"B\"]" + ] + }, + { + "cell_type": "markdown", + "id": "52b31d55-8263-4a2f-814a-de6779a18606", + "metadata": {}, + "source": [ + "### DataFrames\n", + "\n", + "Let's create a simple Dataframe we can use for demonstration purposes. To do so, we pass a dictionary with lists of values for each column to the `pd.DataFrame` constructor." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "914fe380-6aff-4222-bd35-2b274397c64e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col1col2col3
0A36.32
1B48.10
2C54.90
\n", + "
" + ], + "text/plain": [ + " col1 col2 col3\n", + "0 A 3 6.32\n", + "1 B 4 8.10\n", + "2 C 5 4.90" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# create a simple DataFrame\n", + "df = pd.DataFrame(\n", + " {\n", + " \"col1\": [\"A\", \"B\", \"C\"],\n", + " \"col2\": [3, 4, 5],\n", + " \"col3\": [6.32, 8.1, 4.9],\n", + " }\n", + ")\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "4740bb59-db04-46a1-adde-8259c1b9c07b", + "metadata": {}, + "source": [ + "#### Single column\n", + "\n", + "To retrieve a single column, take the DataFrame variable followed by square brackets, passing in the name (label) of a column as a string." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7b994dae-d7d2-426c-9cea-7c656a159144", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 A\n", + "1 B\n", + "2 C\n", + "Name: col1, dtype: object" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"col1\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cab8ab78-ca4e-4289-bc01-91408706d15d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df[\"col1\"])" + ] + }, + { + "cell_type": "markdown", + "id": "d92d825d-ac52-4478-b616-e716f1e23c6c", + "metadata": {}, + "source": [ + "#### Multiple columns\n", + "\n", + "If you pass multiple column names as a list, it will return a new DataFrame with just those columns in that order." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "17f7b6ff-801c-4978-bbbb-a6db0672382e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "columns = [\"col3\", \"col2\"]\n", + "type(columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b193383b-8aff-4470-a490-ed9ee9259939", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col3col2
06.323
18.104
24.905
\n", + "
" + ], + "text/plain": [ + " col3 col2\n", + "0 6.32 3\n", + "1 8.10 4\n", + "2 4.90 5" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[columns]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "df7b4b3d-40e3-4442-82e0-2a2bf1bf3ea2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df[columns])" + ] + }, + { + "cell_type": "markdown", + "id": "b6276ee6-4873-438b-a523-baaff39f46bc", + "metadata": {}, + "source": [ + "You'll sometimes see that written all in one line. In this case, the inner and outer pairs of square brackets are serving different purposes." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "9fa6b0db-e77e-4d44-9f45-814ef05e9a98", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col3col2
06.323
18.104
24.905
\n", + "
" + ], + "text/plain": [ + " col3 col2\n", + "0 6.32 3\n", + "1 8.10 4\n", + "2 4.90 5" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[[\"col3\", \"col2\"]]" + ] + }, + { + "cell_type": "markdown", + "id": "1be8ed8e-cceb-4628-8765-f91cb30e1636", + "metadata": {}, + "source": [ + "#### Boolean indexing\n", + "\n", + "See [Lecture 2](https://python-public-policy.afeld.me/en/{{school_slug}}/lecture_2.html#boolean-indexing). In this case, we're passing in a Series of `True`s and `False`s between square brackets, which tells it which rows to select." + ] + }, + { + "cell_type": "markdown", + "id": "4cc88d09-de68-4ab1-8234-ed084432c915", + "metadata": {}, + "source": [ + "## Parentheses\n", + "\n", + "Parentheses serve a number of purposes in Python. Because pandas is a Python package, it uses them too." + ] + }, + { + "cell_type": "markdown", + "id": "59dede56-a7a6-4a81-a530-7fbdfbb80972", + "metadata": {}, + "source": [ + "### Tuples\n", + "\n", + "Python has a type called a tuple, which is like a list that can't be modified." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "dd73903a-b6c4-4f44-997a-8635f10b95be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tuple" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_tuple = (1, 2, 3)\n", + "type(my_tuple)" + ] + }, + { + "cell_type": "markdown", + "id": "c5424366-1777-45d9-8897-5df3b4097c63", + "metadata": {}, + "source": [ + "### Logical grouping\n", + "\n", + "You can use parentheses to control the [order of operations](https://www.geeksforgeeks.org/precedence-and-associativity-of-operators-in-python/)." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8550caf3-9ca3-42df-b5cb-1e7550a4a990", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.3333333333333333" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(1 + 1) / 6" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "833c6611-aad1-4b1f-af2f-7caab3d5abb9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.1666666666666667" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "1 + (1 / 6)" + ] + }, + { + "cell_type": "markdown", + "id": "c15b34e0-b242-44b4-abf7-685fd6bb22b0", + "metadata": {}, + "source": [ + "### Multi-line statements\n", + "\n", + "You can wrap Python statements in parentheses to split them into multiple lines. This also allows you to embed comments." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a9f2f5d4-a4b3-4077-b933-757d91010e4c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "525600" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "60 * 24 * 365" + ] + }, + { + "cell_type": "markdown", + "id": "3e4bdc01-458c-4e9d-97c8-7bf253a6ca6c", + "metadata": {}, + "source": [ + "can be rewritten as:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "11167f0b-136f-44f4-a894-405a3ce6d0cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "525600" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "minutes_in_year = (\n", + " # minutes per hour\n", + " 60\n", + " # hours per day\n", + " * 24\n", + " # days per year\n", + " * 365\n", + ")\n", + "minutes_in_year" + ] + }, + { + "cell_type": "markdown", + "id": "1e1de3f9-8734-4733-959e-bbc61bfc2234", + "metadata": {}, + "source": [ + "### Functions\n", + "\n", + "In Python, parentheses are used in function definitions to specify the arguments." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "3f2c166c-dd97-4949-b38b-39879c0aba04", + "metadata": {}, + "outputs": [], + "source": [ + "def add_five(num):\n", + " return num + 5" + ] + }, + { + "cell_type": "markdown", + "id": "18447d2f-739b-4df0-8423-8fe31d5fe9c6", + "metadata": {}, + "source": [ + "Then, parentheses are used to call functions, passing in the arguments (if any)." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "90dbfbf5-6f14-48e5-862c-fd8c4101654a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "add_five" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "ba4416d2-076d-4c63-a49a-62b545b43bda", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "add_five(6)" + ] + }, + { + "cell_type": "markdown", + "id": "31009191-1772-4131-8e9b-b60d9efa7aa4", + "metadata": {}, + "source": [ + "### Classes\n", + "\n", + "When making a new instance of a class, you use parentheses after the class name. We saw this [above](#dataframes) with `pd.DataFrame()`." + ] + }, + { + "cell_type": "markdown", + "id": "d5eb8808-5394-4ccf-ab27-84b5ba895530", + "metadata": {}, + "source": [ + "## Angle brackets\n", + "\n", + "Angle brackets are used to do comparison." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "fc811551-62da-48ca-8f00-5299d00c5c9c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "6 > 5" + ] + }, + { + "cell_type": "markdown", + "id": "257763b1-2ea2-4785-bec8-9dcf11d60d15", + "metadata": {}, + "source": [ + "### pandas\n", + "\n", + "In pandas, that comparison can be done across all values in a Series, returning a new Series with the results." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "eb1de26b-d2fe-43aa-8c29-b03a7eef8d51", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 6.32\n", + "1 8.10\n", + "2 4.90\n", + "Name: col3, dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"col3\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "56af790c-408c-4363-bae0-6eca3e5bed5a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df[\"col3\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "cdb20d81-19eb-4527-8c7c-aed94dbac013", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 True\n", + "1 False\n", + "2 True\n", + "Name: col3, dtype: bool" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"col3\"] < 7" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "f2d83dbf-db06-4f4d-bafc-f90be4401a09", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df[\"col3\"] < 7)" + ] + }, + { + "cell_type": "markdown", + "id": "d77d9189-b0ef-435f-88b6-4be784db6413", + "metadata": {}, + "source": [ + "You'll often see this used in [boolean indexing](#boolean-indexing)." + ] + }, + { + "cell_type": "markdown", + "id": "97d8b954-47cc-4880-8900-b174e96295ca", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "Totally reaonable to be confused about which brackets mean what in what contexts. Be patient, it just takes time for it to sink in.\n", + "\n", + "See also: [How do I select a subset of a `DataFrame`?](https://pandas.pydata.org/docs/getting_started/intro_tutorials/03_subset_data.html)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:python-public-policy] *", + "language": "python", + "name": "conda-env-python-public-policy-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/hw_1.md b/hw_1.md index a3675c2e..328e2a99 100644 --- a/hw_1.md +++ b/hw_1.md @@ -34,6 +34,7 @@ Now [turn in the assignment](assignments.md). 1. Learn about functions - [Video](https://www.youtube.com/watch?v=9Os0o3wzS_I&list=PL-osiE80TeTskrapNbzXhwoFUiLCjGgY7&index=8) - [Blog post](https://python.land/introduction-to-python/functions) +1. [Brackets in Python and pandas](brackets.ipynb) 1. Coding Style Guides - Please skim these; I don't expect you to understand and follow everything in them. The most important guidelines to pay attention to are indentation and keeping each statement on its own line. - [The Hitchhiker’s Guide to Python](https://docs.python-guide.org/writing/style/) - [PEP 8](https://www.python.org/dev/peps/pep-0008/)