# PRODUCT BASED MONTHLY PREDICTION AND TRACKING of PLASTIC PACKAGING WEIGHT

**CEU Capstone Project**

*Ersan Kucukoglu*

In [0]:
import os
os.environ["PIP_INDEX_URL"] = f"""https://{dbutils.secrets.get(scope = "artifactory", key = "username")}:{dbutils.secrets.get(scope = "artifactory", key = "password")}@schwarzit.jfrog.io/artifactory/api/pypi/pypi/simple"""
%pip install openpyxl==3.0.9 plotly==5.5.0 dash==2.4.1  --index-url=$PIP_INDEX_URL

- Import the packages

In [0]:
import pandas as pd
import plotly.express as px
import datetime
import plotly.graph_objects as go
import pyspark
import pyspark.sql.functions as f
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from pyspark import SparkContext
#create my spark
ersan_spark = SparkSession.builder.getOrCreate()

In [0]:
df =(
    ersan_spark
    .table("db_ersan.df_predicted_plastic"))

df = df.toPandas()

In [0]:
df['ITEM_NAME'].nunique()

In [0]:

df =(
    ersan_spark
    .table("db_ersan.df_monthly"))

df = df.toPandas()
df.DATE = pd.to_datetime(df.DATE)
df.set_index('DATE',inplace=True)
df.head()

Unnamed: 0_level_0,ITEM_NUMBER,ITEM_NAME,WG_NAME,YEAR,MONTH,TOTAL_SALES_QTY,TOTAL_PRE_PLASTIC_T,TOTAL_NEW_PLASTIC_T,REDUCTION_T
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-06-01,1709,Bruehwurst Stapelpack QS SK24,Kühlung,2022,6,12754.0,0.102797,0.090171,0.012626
2022-05-01,1709,Bruehwurst Stapelpack QS SK24,Kühlung,2022,5,46258.0,0.372839,0.327044,0.045795
2022-04-01,1709,Bruehwurst Stapelpack QS SK24,Kühlung,2022,4,73219.0,0.590145,0.517658,0.072487
2022-03-01,1709,Bruehwurst Stapelpack QS SK24,Kühlung,2022,3,56854.0,0.458243,0.401958,0.056285
2022-02-01,1709,Bruehwurst Stapelpack QS SK24,Kühlung,2022,2,50579.0,0.407667,0.357594,0.050073


In [0]:
df['ITEM_NAME'].nunique()

In [0]:
df_21_22 = df[df.YEAR >= 2021].groupby('DATE').agg(TOTAL_PLASTIC_PACK_T = ('TOTAL_NEW_PLASTIC_T','sum')).reset_index()
#Using Plotly to build the graph

fig = px.line(df_21_22, x=df_21_22.DATE.dt.month, y='TOTAL_PLASTIC_PACK_T',
              color=df_21_22.DATE.dt.year,
              title = "The amount of plastic packaged products in tons on a monthly basis <br><sup>compared to 2021</sup>",
              labels={'x':'MONTHS',
                     'TOTAL_PLASTIC_PACK_T' : 'TOTAL PLASTIC PACKAGING IN TONES',
                    'color':'Year'},
              height=500,template='plotly_dark')

fig.update_layout(showlegend=True,hovermode="x unified")
fig.update_traces(cliponaxis=False,mode="markers+lines", hovertemplate=None)

fig.show()


In [0]:
reduction_df = df.loc[df.YEAR == datetime.datetime.now().year].groupby("ITEM_NAME").agg(YEARLY_PRE_PLASTIC_PACK_T = ('TOTAL_PRE_PLASTIC_T','sum'),
                                                                                    YEARLY_NEW_PLASTIC_PACK_T = ('TOTAL_NEW_PLASTIC_T','sum')).reset_index()
reduction_ratio = (1-(reduction_df.YEARLY_NEW_PLASTIC_PACK_T)/(reduction_df.YEARLY_PRE_PLASTIC_PACK_T))*100
reduction_df.loc[:,'REDUCTION'] = reduction_ratio

#####

fig = go.Figure()
fig.add_trace(go.Indicator(
    mode = "number+delta",
    number = {'suffix': " t"},
    value = reduction_df['YEARLY_NEW_PLASTIC_PACK_T'].sum() ,
    title = {"text": "Total Plastic Packaging Material Weight in 2022<br><span style='font-size:0.8em;color:grey'>How many tons of plastic packaging were saved by optimizing plastic packaged products ?</span><br><span style='font-size:0.8em;color:grey'>"},
    delta = {'reference': reduction_df['YEARLY_PRE_PLASTIC_PACK_T'].sum(),'relative': False, 'position' : "bottom"},
    domain = {'x': [0, 1], 'y': [0, 1]}))

fig.show()

In [0]:
curr_year = df[(df['YEAR']==datetime.datetime.now().year)]['TOTAL_NEW_PLASTIC_T'].sum()
print(curr_year)
prev_year = df[(df['YEAR']==datetime.datetime.now().year-1) & (df['MONTH']<=datetime.datetime.now().month)]['TOTAL_NEW_PLASTIC_T'].sum()
print(prev_year)
##


fig = go.Figure(go.Indicator(
    mode = "gauge+number",
    number = {'suffix': " T"},
    value = prev_year - curr_year,
    title = {'text': "How many tons of plastic packaging were saved compared to last year?"},
    domain = {'x': [0, 1], 'y': [0, 1]}
))

fig.show()