# Exploring Eco topics with Python | TEST NOTEBOOK - Carbon Polluters Exploration
---

**<font color='red'>FYI internet access required for E.</font>**

---

## A.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
raw_data = pd.read_csv("CarbonPollutersExp_DATA.csv")

In [None]:
df_prep = raw_data.copy()

## B.

In [None]:
df_prep.shape

## C.

In [None]:
list(enumerate(df_prep.columns))

In [None]:
df_prep.drop(columns = df_prep.columns[[10,22,23,24]], inplace=True)

In [None]:
df_prep.dtypes

In [None]:
df_prep.rename(columns = lambda x: x.replace("TOTAL REPORTED EMISSIONS, ", ""), inplace=True)

In [None]:
df_prep.loc[:, "2011":] = df_prep.loc[:, "2011":].applymap(lambda x: x.replace(",", ""))

In [None]:
df_prep.replace("---", np.nan, inplace=True)

In [None]:
df_prep.loc[:, "2011":] = df_prep.loc[:, "2011":].apply(pd.to_numeric)

In [None]:
df_prep.describe()

In [None]:
df_prep[ df_prep.columns[-11:]].T.plot(legend=False)

In [None]:
((df_prep.iloc[:, -11:].isna()) | (df_prep.iloc[:, -11:] > 25000)).values.all()

In [None]:
df_prep["Cumulative"] = df_prep.loc[:, "2011":].sum(axis=1)

In [None]:
df_prep["Cumulative"].sum()

## D.

In [None]:
import geopandas as gpd

In [None]:
geo_df = gpd.GeoDataFrame(df_prep, geometry=gpd.points_from_xy(df_prep["LONGITUDE"], df_prep["LATITUDE"]))

In [None]:
geo_df.plot(column="Cumulative", cmap="cool")

In [None]:
base_map = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) 

In [None]:
base_map.plot()

In [None]:
base_map.at[4, "geometry"]

In [None]:
base_map.crs

In [None]:
fig, ax = plt.subplots(figsize=(12,8))    
base_map.plot(ax=ax)
geo_df.plot(ax=ax, column="Cumulative", cmap="autumn_r")  
plt.show()

## E.

In [None]:
import folium

In [None]:
US_state_boundaries = gpd.read_file("cb_2021_us_state_20m/cb_2021_us_state_20m.shp")

In [None]:
US_state_boundaries.explore()

In [None]:
base_layer = US_state_boundaries.explore(location=[39, -97], width="50%", height="50%", zoom_start=4, min_zoom=3, tooltip=False, style_kwds=dict(fillOpacity=1, weight=1, fillColor="gainsboro"), highlight_kwds=dict(fillOpacity=0, weight=3))

geo_df.explore(m=base_layer, column="Cumulative", marker_type="circle", marker_kwds=dict(radius=4828, fill=False), cmap="autumn_r", popup=["FACILITY", "Cumulative"], tooltip=False)

base_layer

In [None]:
from folium.plugins import HeatMap

heatmap_points = [ [lat, long, weight] for lat, long, weight in zip(geo_df["LATITUDE"], geo_df["LONGITUDE"], geo_df["Cumulative"]) ]

map_layer = folium.Map(location=[39, -97], tiles="cartodbpositron", width="50%", height="50%", zoom_start=4, min_zoom=3)
HeatMap(heatmap_points, radius=15, blur=5).add_to(map_layer)

map_layer

## F.

In [None]:
df_trends = df_prep.copy()

In [None]:
del df_trends["geometry"]

In [None]:
df_trends.set_index(["FACILITY", "STATE"], inplace=True)

In [None]:
df_trends.drop(df_trends.loc[:, "GHGRP ID":"PARENT COMPANIES"], axis=1, inplace=True)

In [None]:
df_trends.sort_values("Cumulative").loc[:, "2011":"2021"].plot(kind="barh", stacked=True, figsize=(20,15), legend=False, fontsize=8)

In [None]:
worst12states = df_trends.groupby(["STATE"])["Cumulative"].sum().sort_values(ascending=False).index[:12]

In [None]:
df_timeseries = df_trends.drop(columns = "Cumulative").copy()

In [None]:
df_timeseries.xs("PA", level="STATE").T

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(25,20), sharey=True)
for state, ax in zip(worst12states, axes.flatten()):
    df_timeseries.xs(state, level="STATE").T.plot(ax=ax)
    ax.set_title(state)
    ax.legend(loc="best", fontsize="medium", labelspacing=0.25)

plt.setp(axes, xlabel="Reporting Year", ylabel="CO2e metric tons per Reporting Year", xticks=range(len(df_timeseries.columns)), xticklabels= df_timeseries.columns,    
         yticks=range(0,550001,25000))
plt.tight_layout()
plt.show()

---
Copyright © 2023 Rho Zeta AI Ltd. All rights reserved.