## Analytics Green vs Not Green Manufacturing Efficiency

In [None]:
df_green = spark.sql("SELECT * FROM hive_metastore.default.greenefficiency")
df_not_green = spark.sql("SELECT * FROM hive_metastore.default.notgreenefficiency")

In [None]:
df_green_pd = df_green.toPandas()
df_not_green_pd = df_not_green.toPandas()

In [None]:
df_green_pd = df_green_pd.sort_values(by='TS')
df_not_green_pd = df_not_green_pd.sort_values(by='TS')

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression

df_green_pd['TS_num'] = df_green_pd['TS'].apply(lambda x: x.toordinal())
df_not_green_pd['TS_num'] = df_not_green_pd['TS'].apply(lambda x: x.toordinal())

reg_green = LinearRegression().fit(df_green_pd[['TS_num']], df_green_pd['Efficiency'])
reg_not_green = LinearRegression().fit(df_not_green_pd[['TS_num']], df_not_green_pd['Efficiency'])

df_green_pd['RegLine'] = reg_green.predict(df_green_pd[['TS_num']])
df_not_green_pd['RegLine'] = reg_not_green.predict(df_not_green_pd[['TS_num']])

fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 8))

axes[0].plot(df_green_pd['TS'], df_green_pd['Efficiency'], marker='o', linestyle='-', color='#00B76A', label='Efficiency')
axes[0].plot(df_green_pd['TS'], df_green_pd['RegLine'], linestyle='--', color='red', label='Regression Line')
axes[0].set_title('Efficiency Over Time (Green Energy)', fontsize=14)
axes[0].set_xlabel('Timestamp', fontsize=12)
axes[0].set_ylabel('Efficiency', fontsize=12)
axes[0].tick_params(axis='x', rotation=45)
axes[0].set_ylim(0, 0.3)
axes[0].grid(False)
axes[0].legend()

axes[1].plot(df_not_green_pd['TS'], df_not_green_pd['Efficiency'], marker='o', linestyle='-', color='#8A60FF', label='Efficiency')
axes[1].plot(df_not_green_pd['TS'], df_not_green_pd['RegLine'], linestyle='--', color='red', label='Regression Line')
axes[1].set_title('Efficiency Over Time (Non-Green Energy)', fontsize=14)
axes[1].set_xlabel('Timestamp', fontsize=12)
axes[1].set_ylabel('Efficiency', fontsize=12)
axes[1].tick_params(axis='x', rotation=45)
axes[1].set_ylim(0, 0.3)
axes[1].grid(False)
axes[1].legend()

for ax in axes:
    ax.yaxis.set_major_locator(plt.MaxNLocator(nbins=10))

plt.tight_layout()
plt.show()
