From a34de8246821ef05969c5d028e7d0cf2b9132a1b Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Fri, 2 Oct 2020 03:21:01 -0700 Subject: [PATCH] Use f-strings for nicer formatting I was watching @maalvikabhat's video demo of this notebook today (nice job!) and noticed that some of the text output didn't include commas in the numbers, which would have been nice. It's pretty easy to do when using f-strings, so I went ahead and updated those, and then all the rest of the strings to use them. Python's f-strings are typically much more readable than string concatenation (`"hello " + some_variable + "!"`), interpolation (`"hello %s!" % (some_variable)`), or string.format() (`"hello {}!".format(some_variable)`) because the variables/expressions are directly inline within the string. They also offer some fancy formatting options that none of the others (except string.format) do. For example, you can add thousands separators to numbers: ```py >>> x = 6543245 >>> f"There were {x:,} results" "There were 6,543,245 results" ``` It also doesn't hurt that f-strings are faster, CPU-wise, than all other options. ;) --- ECHO-Sunrise.ipynb | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/ECHO-Sunrise.ipynb b/ECHO-Sunrise.ipynb index 43dde32..e85354e 100644 --- a/ECHO-Sunrise.ipynb +++ b/ECHO-Sunrise.ipynb @@ -155,7 +155,7 @@ "geo_json_data = cd_data[(cd_data[\"District\"] == select_region_widget.value)] #where ids match\n", "\n", "# Get the EPA data from the Stonybrook University database\n", - "echo_data_sql = 'select * from \"ECHO_EXPORTER\" where \"FAC_STATE\" = \\'MA\\' and \"FAC_ACTIVE_FLAG\"=\\'Y\\' '\n", + "echo_data_sql = \"\"\"select * from \"ECHO_EXPORTER\" where \"FAC_STATE\" = 'MA' and \"FAC_ACTIVE_FLAG\"='Y' \"\"\"\n", "echo_data = get_data( echo_data_sql, 'REGISTRY_ID' )\n", " \n", "if ( not echo_data.empty ):\n", @@ -166,12 +166,12 @@ " this_district_data = echo_data.loc[(echo_data[\"FAC_DERIVED_CD113\"]==float(select_region_widget.value.strip(\"Congressional District\")))]\n", "\n", " # Summarize \n", - " display(HTML(\"

There are %s facilities in Massachussets currently tracked in the ECHO database.\" %(num_facilities)))\n", - " display(HTML(\"

There are \"+ str(this_district_data.shape[0]) + \" facilities in this district.\"))\n", - " display(HTML(\"

There are \"+ str(this_district_data.loc[this_district_data[\"RCRA_FLAG\"] == \"Y\"].shape[0]) + \" facilities regulated under RCRA (hazardous waste) in this district.

\"))\n", - " display(HTML(\"

There are \"+ str(this_district_data.loc[this_district_data[\"NPDES_FLAG\"] == \"Y\"].shape[0]) + \" facilities regulated under the Clean Water Act in this district.

\"))\n", - " display(HTML(\"

There are \"+ str(this_district_data.loc[this_district_data[\"AIR_FLAG\"] == \"Y\"].shape[0]) + \" facilities regulated under the Clean Air Act in this district.

\"))\n", - " display(HTML(\"

There are \"+ str(this_district_data.loc[this_district_data[\"GHG_FLAG\"] == \"Y\"].shape[0]) + \" facilities reporting greenhouse gas emissions in this district.

\"))\n" + " display(HTML(f\"

There are {num_facilities:,} facilities in Massachussets currently tracked in the ECHO database.\"))\n", + " display(HTML(f\"

There are {this_district_data.shape[0]:,} facilities in this district.\"))\n", + " display(HTML(f\"

There are {this_district_data.loc[this_district_data['RCRA_FLAG'] == 'Y'].shape[0]:,} facilities regulated under RCRA (hazardous waste) in this district.

\"))\n", + " display(HTML(f\"

There are {this_district_data.loc[this_district_data['NPDES_FLAG'] == 'Y'].shape[0]:,} facilities regulated under the Clean Water Act in this district.

\"))\n", + " display(HTML(f\"

There are {this_district_data.loc[this_district_data['AIR_FLAG'] == 'Y'].shape[0]:,} facilities regulated under the Clean Air Act in this district.

\"))\n", + " display(HTML(f\"

There are {this_district_data.loc[this_district_data['GHG_FLAG'] == 'Y'].shape[0]:,} facilities reporting greenhouse gas emissions in this district.

\"))\n" ] }, { @@ -274,14 +274,14 @@ "plt.figure(figsize=(20,10))\n", "if ( bars is not None ):\n", " plt.bar(bars.index, bars[program.agg_col])\n", - " plt.bar(bars.index, bars[program.agg_col + \" in this District\"])\n", + " plt.bar(bars.index, bars[f\"{program.agg_col} in this District\"])\n", " plt.title(program.name)\n", " plt.xlabel( 'Year' )\n", " plt.ylabel( program.unit )\n", - " plt.legend([\"Rest of the state\", \"In %s\" %(select_region_widget.value)])\n", + " plt.legend([\"Rest of the state\", f\"In {select_region_widget.value}\"])\n", " plt.show() \n", "\n", - " bars.to_csv(\"trends-\"+program.name+\"-MA-\"+select_region_widget.value+\".csv\")" + " bars.to_csv(f\"trends-{program.name}-MA-{select_region_widget.value}.csv\")" ] }, { @@ -300,13 +300,13 @@ "source": [ "\n", "if (district_program_data is not None and district_program_data.shape[0] > 0):\n", - " district_program_data.to_csv(\"district_program_data-\"+program.name+\"-\"+select_region_widget.value+\".csv\")\n", + " district_program_data.to_csv(f\"district_program_data-{program.name}-{select_region_widget.value}.csv\")\n", " ranked = district_program_data.set_index(\"Index\")\n", " q_num = 5 if ( len( ranked ) >= 5 ) else int( len( ranked ) / 2 )\n", " ranked['quantile'] = pd.qcut(ranked[program.agg_col], 5, labels=False, duplicates=\"drop\")\n", " ranked = ranked.sort_values(by=program.agg_col, ascending=False)\n", - " ranked.to_csv(\"facilities_ranked-\"+program.name+\".csv\")\n", - " print( \"{} facilities have been ranked.\".format( len( ranked )))\n", + " ranked.to_csv(f\"facilities_ranked-{program.name}.csv\")\n", + " print(f\"{len(ranked)} facilities have been ranked.\")\n", "\n", " time = '2018' if (program.name == \"Greenhouse Gas Emissions\") else '2020'\n", " sns.set(style='whitegrid')\n", @@ -314,7 +314,7 @@ " unit = ranked[0:19].index # First 20 rows \n", " values = ranked[0:19][program.agg_col] # First 20 rows\n", " g = sns.barplot(values, unit, order=list(unit), orient=\"h\") \n", - " g.set_title('20 facilities with the most %s in %s from 2010-%s' %(program.name, select_region_widget.value, time))\n", + " g.set_title(f'20 facilities with the most {program.name} in {select_region_widget.value} from 2010-{time}')\n", " ax.set_xlabel(program.unit)\n", " ax.set_ylabel(\"Facility\")\n", " ax.set_yticklabels(ranked[0:19][\"FAC_NAME\"])\n",