diff --git a/ECHO-Sunrise.ipynb b/ECHO-Sunrise.ipynb index 43dde32..e85354e 100644 --- a/ECHO-Sunrise.ipynb +++ b/ECHO-Sunrise.ipynb @@ -155,7 +155,7 @@ "geo_json_data = cd_data[(cd_data[\"District\"] == select_region_widget.value)] #where ids match\n", "\n", "# Get the EPA data from the Stonybrook University database\n", - "echo_data_sql = 'select * from \"ECHO_EXPORTER\" where \"FAC_STATE\" = \\'MA\\' and \"FAC_ACTIVE_FLAG\"=\\'Y\\' '\n", + "echo_data_sql = \"\"\"select * from \"ECHO_EXPORTER\" where \"FAC_STATE\" = 'MA' and \"FAC_ACTIVE_FLAG\"='Y' \"\"\"\n", "echo_data = get_data( echo_data_sql, 'REGISTRY_ID' )\n", " \n", "if ( not echo_data.empty ):\n", @@ -166,12 +166,12 @@ " this_district_data = echo_data.loc[(echo_data[\"FAC_DERIVED_CD113\"]==float(select_region_widget.value.strip(\"Congressional District\")))]\n", "\n", " # Summarize \n", - " display(HTML(\"

There are %s facilities in Massachussets currently tracked in the ECHO database.\" %(num_facilities)))\n", - " display(HTML(\"

There are \"+ str(this_district_data.shape[0]) + \" facilities in this district.\"))\n", - " display(HTML(\"

There are \"+ str(this_district_data.loc[this_district_data[\"RCRA_FLAG\"] == \"Y\"].shape[0]) + \" facilities regulated under RCRA (hazardous waste) in this district.

\"))\n", - " display(HTML(\"

There are \"+ str(this_district_data.loc[this_district_data[\"NPDES_FLAG\"] == \"Y\"].shape[0]) + \" facilities regulated under the Clean Water Act in this district.

\"))\n", - " display(HTML(\"

There are \"+ str(this_district_data.loc[this_district_data[\"AIR_FLAG\"] == \"Y\"].shape[0]) + \" facilities regulated under the Clean Air Act in this district.

\"))\n", - " display(HTML(\"

There are \"+ str(this_district_data.loc[this_district_data[\"GHG_FLAG\"] == \"Y\"].shape[0]) + \" facilities reporting greenhouse gas emissions in this district.

\"))\n" + " display(HTML(f\"

There are {num_facilities:,} facilities in Massachussets currently tracked in the ECHO database.\"))\n", + " display(HTML(f\"

There are {this_district_data.shape[0]:,} facilities in this district.\"))\n", + " display(HTML(f\"

There are {this_district_data.loc[this_district_data['RCRA_FLAG'] == 'Y'].shape[0]:,} facilities regulated under RCRA (hazardous waste) in this district.

\"))\n", + " display(HTML(f\"

There are {this_district_data.loc[this_district_data['NPDES_FLAG'] == 'Y'].shape[0]:,} facilities regulated under the Clean Water Act in this district.

\"))\n", + " display(HTML(f\"

There are {this_district_data.loc[this_district_data['AIR_FLAG'] == 'Y'].shape[0]:,} facilities regulated under the Clean Air Act in this district.

\"))\n", + " display(HTML(f\"

There are {this_district_data.loc[this_district_data['GHG_FLAG'] == 'Y'].shape[0]:,} facilities reporting greenhouse gas emissions in this district.

\"))\n" ] }, { @@ -274,14 +274,14 @@ "plt.figure(figsize=(20,10))\n", "if ( bars is not None ):\n", " plt.bar(bars.index, bars[program.agg_col])\n", - " plt.bar(bars.index, bars[program.agg_col + \" in this District\"])\n", + " plt.bar(bars.index, bars[f\"{program.agg_col} in this District\"])\n", " plt.title(program.name)\n", " plt.xlabel( 'Year' )\n", " plt.ylabel( program.unit )\n", - " plt.legend([\"Rest of the state\", \"In %s\" %(select_region_widget.value)])\n", + " plt.legend([\"Rest of the state\", f\"In {select_region_widget.value}\"])\n", " plt.show() \n", "\n", - " bars.to_csv(\"trends-\"+program.name+\"-MA-\"+select_region_widget.value+\".csv\")" + " bars.to_csv(f\"trends-{program.name}-MA-{select_region_widget.value}.csv\")" ] }, { @@ -300,13 +300,13 @@ "source": [ "\n", "if (district_program_data is not None and district_program_data.shape[0] > 0):\n", - " district_program_data.to_csv(\"district_program_data-\"+program.name+\"-\"+select_region_widget.value+\".csv\")\n", + " district_program_data.to_csv(f\"district_program_data-{program.name}-{select_region_widget.value}.csv\")\n", " ranked = district_program_data.set_index(\"Index\")\n", " q_num = 5 if ( len( ranked ) >= 5 ) else int( len( ranked ) / 2 )\n", " ranked['quantile'] = pd.qcut(ranked[program.agg_col], 5, labels=False, duplicates=\"drop\")\n", " ranked = ranked.sort_values(by=program.agg_col, ascending=False)\n", - " ranked.to_csv(\"facilities_ranked-\"+program.name+\".csv\")\n", - " print( \"{} facilities have been ranked.\".format( len( ranked )))\n", + " ranked.to_csv(f\"facilities_ranked-{program.name}.csv\")\n", + " print(f\"{len(ranked)} facilities have been ranked.\")\n", "\n", " time = '2018' if (program.name == \"Greenhouse Gas Emissions\") else '2020'\n", " sns.set(style='whitegrid')\n", @@ -314,7 +314,7 @@ " unit = ranked[0:19].index # First 20 rows \n", " values = ranked[0:19][program.agg_col] # First 20 rows\n", " g = sns.barplot(values, unit, order=list(unit), orient=\"h\") \n", - " g.set_title('20 facilities with the most %s in %s from 2010-%s' %(program.name, select_region_widget.value, time))\n", + " g.set_title(f'20 facilities with the most {program.name} in {select_region_widget.value} from 2010-{time}')\n", " ax.set_xlabel(program.unit)\n", " ax.set_ylabel(\"Facility\")\n", " ax.set_yticklabels(ranked[0:19][\"FAC_NAME\"])\n",