<a href="https://colab.research.google.com/github/Umamrozaq/streamlit-echarts-demo/blob/master/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
from xgboost import XGBRegressor
from google.colab import files


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
dataco_supply_chain = pd.read_csv('/content/drive/MyDrive/supply chain forecast/dataset/DataCoSupplyChainDataset.csv', encoding='ISO-8859-1')
dataco_supply_chain.head()

## Data Preparation & Cleaning

In [None]:
## ubah colom menjadi huruf kapital dan ubah spasi menjadi _
dtype='category'
dataco_supply_chain.columns = dataco_supply_chain.columns.str.upper().str.replace(' ', '_')
dataco_supply_chain.head()

In [None]:
dataco_supply_chain.info()

In [None]:
## pilih kolom yang digunakan

dataco_supply_chain = dataco_supply_chain[
    ['ORDER_DATE_(DATEORDERS)'
     , 'CATEGORY_NAME','CATEGORY_ID'
     ,'ORDER_ITEM_QUANTITY'
     ,'ORDER_REGION'
     ,'ORDER_STATUS'
     ,'PRODUCT_NAME','PRODUCT_CARD_ID'
     ,'DAYS_FOR_SHIPPING_(REAL)','DAYS_FOR_SHIPMENT_(SCHEDULED)'
     ,
    ]
]

In [None]:
dataco_supply_chain.head()

In [None]:
dataco_supply_chain.info()

In [None]:
dataco_supply_chain['ORDER_DATE_(DATEORDERS)'] = pd.to_datetime(dataco_supply_chain['ORDER_DATE_(DATEORDERS)'])
dataco_supply_chain.info()

##Data Transformation

In [None]:
# Extracting year, month, day, and weekday from the order date
dataco_supply_chain['ORDER_YEAR'] = dataco_supply_chain['ORDER_DATE_(DATEORDERS)'].dt.year
dataco_supply_chain['ORDER_MONTH'] = dataco_supply_chain['ORDER_DATE_(DATEORDERS)'].dt.month
dataco_supply_chain['ORDER_DAY'] = dataco_supply_chain['ORDER_DATE_(DATEORDERS)'].dt.day
dataco_supply_chain['ORDER_WEEKDAY'] = dataco_supply_chain['ORDER_DATE_(DATEORDERS)'].dt.weekday
dataco_supply_chain['ORDER_DATE'] = dataco_supply_chain['ORDER_DATE_(DATEORDERS)'].dt.date
dataco_supply_chain.drop(columns='ORDER_DATE_(DATEORDERS)', inplace=True)
dataco_supply_chain.info()

## Exploratory Data Analysis

In [None]:
dataco_supply_chain.describe(datetime_is_numeric=True).round(2)

In [None]:
# Distribution of ORDER_ITEM_QUANTITY
plt.figure(figsize=(10, 6))
sns.displot(dataco_supply_chain['ORDER_ITEM_QUANTITY'])
plt.title('Distribution of Order Item Quantity')
plt.show()

# Orders over time
plt.figure(figsize=(12, 6))
dataco_supply_chain.groupby('ORDER_DATE')['ORDER_ITEM_QUANTITY'].sum().plot()
plt.title('Total Orders Over Time')
plt.ylabel('Total Order Quantity')
plt.show()

# Distribution of orders by region
plt.figure(figsize=(12, 6))
sns.countplot(data=dataco_supply_chain, y='ORDER_REGION')
plt.title('Distribution of Orders by Region')
plt.show()

# Distribution of orders by category
plt.figure(figsize=(12, 6))
sns.countplot(data=dataco_supply_chain, y='CATEGORY_NAME')
plt.title('Distribution of Orders by Category')
plt.show()

# Distribution of order status
plt.figure(figsize=(10, 6))
sns.countplot(data=dataco_supply_chain, y='ORDER_STATUS')
plt.title('Distribution of Order Status')
plt.show()

# Average days for real shipping vs scheduled shipping
plt.figure(figsize=(10, 6))
sns.barplot(data=dataco_supply_chain, x='ORDER_REGION', y='DAYS_FOR_SHIPPING_(REAL)', color='blue', label='Real Shipping Days')
sns.barplot(data=dataco_supply_chain, x='ORDER_REGION', y='DAYS_FOR_SHIPMENT_(SCHEDULED)', color='red', alpha=0.5, label='Scheduled Shipping Days')
plt.title('Average Days for Real vs Scheduled Shipping by Region')
plt.legend()
plt.xticks(rotation=45)
plt.show()

# Correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(dataco_supply_chain.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# Orders over time
plt.figure(figsize=(12, 8))
daily_orders = dataco_supply_chain.groupby(['ORDER_DATE'])['ORDER_ITEM_QUANTITY'].sum().reset_index()
sns.lineplot(data=daily_orders, x='ORDER_DATE', y='ORDER_ITEM_QUANTITY')
plt.title('Total Order Quantity Over Time')
plt.xlabel('Date')
plt.ylabel('Total Quantity')
plt.show()

In [None]:
# Orders over time - filter out last dates
plt.figure(figsize=(12, 8))
filtered_daily_orders = daily_orders.copy()
#filtered_daily_orders = daily_orders[(daily_orders['ORDER_DATE'] <= pd.to_datetime('2017-07-01'))]
sns.lineplot(data=filtered_daily_orders, x='ORDER_DATE', y='ORDER_ITEM_QUANTITY')
plt.title('Total Order Quantity Over Time')
plt.xlabel('Date')
plt.ylabel('Total Quantity')
plt.show()

In [None]:
# Boxplot for daily order quantities
plt.figure(figsize=(10, 6))
sns.boxplot(x=filtered_daily_orders['ORDER_ITEM_QUANTITY'])
plt.title('Boxplot of Daily Order Quantities')
plt.xlabel('Total Order Quantity')
plt.show()

In [None]:
# remove outliers
# Aggregate order quantities by date

# Calculate Q1, Q3, and IQR
Q1 = filtered_daily_orders['ORDER_ITEM_QUANTITY'].quantile(0.25)
Q3 = filtered_daily_orders['ORDER_ITEM_QUANTITY'].quantile(0.75)
IQR = Q3 - Q1

# Define bounds
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Filter out outliers
cleaned_daily_orders = filtered_daily_orders[(filtered_daily_orders['ORDER_ITEM_QUANTITY'] >= lower_bound) &
                               (filtered_daily_orders['ORDER_ITEM_QUANTITY'] <= upper_bound)]

# Plotting the time series without outliers
plt.figure(figsize=(12, 6))
plt.plot(cleaned_daily_orders['ORDER_DATE'], cleaned_daily_orders['ORDER_ITEM_QUANTITY'])
plt.title('Total Orders Over Time (Without Outliers)')
plt.ylabel('Total Order Quantity')
plt.show()

# Boxplot for daily order quantities
plt.figure(figsize=(10, 6))
sns.boxplot(x=cleaned_daily_orders['ORDER_ITEM_QUANTITY'])
plt.title('Boxplot of Daily Order Quantities')
plt.xlabel('Total Order Quantity')
plt.show()

In [None]:
# create monthly, weekly data
cleaned_daily_orders = cleaned_daily_orders.copy()
cleaned_daily_orders['ORDER_DATE'] = pd.to_datetime(cleaned_daily_orders['ORDER_DATE'])
cleaned_daily_orders['YEAR_MONTH']=cleaned_daily_orders['ORDER_DATE'].dt.to_period('M')
cleaned_daily_orders['YEAR_WEEK']=cleaned_daily_orders['ORDER_DATE'].dt.to_period('W')
cleaned_daily_orders

In [None]:
cleaned_daily_orders.info()

In [None]:
# Group by 'year-month' and sum the ORDER_ITEM_QUANTITY
monthly_orders = cleaned_daily_orders.groupby('YEAR_MONTH')['ORDER_ITEM_QUANTITY'].sum()
weekly_orders = cleaned_daily_orders.groupby('YEAR_WEEK')['ORDER_ITEM_QUANTITY'].sum()

# Plot
plt.figure(figsize=(15, 6))
monthly_orders.plot(kind='line', marker='o')
plt.title('Total Orders by Month')
plt.ylabel('Total Order Quantity')
plt.xlabel('Month')
plt.grid(True)
plt.show()

# Plot
plt.figure(figsize=(15, 6))
weekly_orders.plot(kind='line', marker='o')
plt.title('Total Orders by Week')
plt.ylabel('Total Order Quantity')
plt.xlabel('Week')
plt.grid(True)
plt.show()

In [None]:
weekly_orders.info()

## Demand forecasting

Demand forecasting adalah proses peramalan atau prediksi permintaan (demand) suatu produk atau layanan di masa depan. Tujuan dari demand forecasting adalah untuk membantu perusahaan atau organisasi dalam perencanaan produksi, persediaan, distribusi, dan manajemen sumber daya lainnya dengan lebih efisien. Dengan informasi yang akurat tentang permintaan di masa depan, perusahaan dapat menghindari overstock atau understock barang, mengoptimalkan produksi, dan meningkatkan kepuasan pelanggan.

In [None]:
from prophet import Prophet

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Convert the series to a DataFrame
weekly_orders_df = weekly_orders.reset_index()
weekly_orders_df.columns = ['ds', 'y']

# Convert the 'ds' column to datetime format
weekly_orders_df['ds'] = weekly_orders_df['ds'].dt.to_timestamp()

# Split the data (holding out the last 20% for testing)
split_point = int(len(weekly_orders_df) * 0.80)
train = weekly_orders_df.iloc[:split_point]
test = weekly_orders_df.iloc[split_point:]

# Initialize and fit the Prophet model
weekly_model = Prophet()
weekly_model.fit(train)

# Create future dates for prediction (entire duration: train + test)
weekly_future = weekly_model.make_future_dataframe(periods=len(test), freq='W-SUN')

# Predict
weekly_forecast = weekly_model.predict(weekly_future)

In [None]:
# Evaluate on Training data
y_pred_train = weekly_forecast['yhat'][:split_point]
mae_train = mean_absolute_error(train['y'], y_pred_train)
mse_train = mean_squared_error(train['y'], y_pred_train)
rmse_train = np.sqrt(mse_train)

print(f"Training MAE: {mae_train}")
print(f"Training MSE: {mse_train}")
print(f"Training RMSE: {rmse_train}")

# Evaluate on Testing data
y_pred_test = weekly_forecast['yhat'][split_point:]
mae_test = mean_absolute_error(test['y'], y_pred_test)
mse_test = mean_squared_error(test['y'], y_pred_test)
rmse_test = np.sqrt(mse_test)

print(f"\nTesting MAE: {mae_test}")
print(f"Testing MSE: {mse_test}")
print(f"Testing RMSE: {rmse_test}")

In [None]:
# Plot the forecast
weekly_fig1 = weekly_model.plot(weekly_forecast)
plt.title('Weekly Orders Forecast')
plt.xlabel('Date')
plt.ylabel('Order Quantity')
plt.show()

# If you want to see the components of the forecast (trend, yearly seasonality, etc.)
weekly_fig2 = weekly_model.plot_components(weekly_forecast)

In [None]:
##Visualize actual versus predicted values.
# Plotting actual vs predicted values
plt.figure(figsize=(15, 6))
plt.plot(train['ds'], train['y'], label='Training Data', color='blue')
plt.plot(test['ds'], test['y'], label='Actual Test Data', color='orange')
plt.plot(train['ds'], y_pred_train, label='Predicted Training Data', color='red', linestyle='--')
plt.plot(test['ds'], y_pred_test, label='Predicted Test Data', color='green', linestyle='--')
plt.title('Actual vs Predicted Weekly Orders')
plt.xlabel('Date')
plt.ylabel('Order Quantity')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
weekly_orders_df.describe()

## Inventory management

In [None]:
import matplotlib.pyplot as plt

# Merge 'y' values from train and test into weekly_forecast
weekly_forecast = weekly_forecast.merge(train[['ds', 'y']], on='ds', how='left')
weekly_forecast = weekly_forecast.merge(test[['ds', 'y']], on='ds', how='left', suffixes=('', '_test'))
weekly_forecast['y'].fillna(weekly_forecast['y_test'], inplace=True)
weekly_forecast.drop(columns='y_test', inplace=True)

# Calculate weekly standard deviation of actual demand
weekly_forecast['std_dev'] = weekly_forecast['y'].rolling(window=7).std()

# Calculate weekly safety stock
weekly_forecast['safety_stock'] = 1.65 * weekly_forecast['std_dev'] * np.sqrt(1)

# Calculate weekly average demand from actual data
weekly_forecast['avg_weekly_demand'] = weekly_forecast['y'].rolling(window=7).mean()

# Calculate weekly reorder point
weekly_forecast['reorder_point'] = (weekly_forecast['avg_weekly_demand'] * 1) + weekly_forecast['safety_stock']

# Plotting
plt.figure(figsize=(15, 7))
plt.plot(weekly_forecast['ds'], weekly_forecast['y'], label='Actual Demand', color='blue')
plt.plot(weekly_forecast['ds'], weekly_forecast['yhat'], label='Predicted Demand', color='green')
plt.plot(weekly_forecast['ds'], weekly_forecast['safety_stock'], label='Safety Stock', color='red', linestyle='--')
plt.plot(weekly_forecast['ds'], weekly_forecast['reorder_point'], label='Reorder Point', color='orange', linestyle='--')
plt.legend()
plt.title('Actual vs Predicted Demand with Safety Stock and Reorder Point')
plt.xlabel('Date')
plt.ylabel('Order Quantity')
plt.show()

In [None]:
##With Forecasted Safety Stock and Reorder Point
# Calculate weekly standard deviation of forecasted demand
weekly_forecast['forecasted_std_dev'] = weekly_forecast['yhat'].rolling(window=7).std()

# Calculate weekly safety stock for forecasted demand
weekly_forecast['forecasted_safety_stock'] = 1.65 * weekly_forecast['forecasted_std_dev'] * np.sqrt(1)

# Calculate weekly average demand from forecasted data
weekly_forecast['forecasted_avg_weekly_demand'] = weekly_forecast['yhat'].rolling(window=7).mean()

# Calculate weekly reorder point for forecasted demand
weekly_forecast['forecasted_reorder_point'] = (weekly_forecast['forecasted_avg_weekly_demand'] * 1) + weekly_forecast['forecasted_safety_stock']

# Plotting
plt.figure(figsize=(15, 7))
plt.plot(weekly_forecast['ds'], weekly_forecast['y'], label='Actual Demand', color='blue')
plt.plot(weekly_forecast['ds'], weekly_forecast['yhat'], label='Predicted Demand', color='green')
plt.plot(weekly_forecast['ds'], weekly_forecast['safety_stock'], label='Safety Stock (Actual)', color='red', linestyle='--')
plt.plot(weekly_forecast['ds'], weekly_forecast['forecasted_safety_stock'], label='Safety Stock (Forecasted)', color='purple', linestyle='--')
plt.plot(weekly_forecast['ds'], weekly_forecast['reorder_point'], label='Reorder Point (Actual)', color='orange', linestyle='--')
plt.plot(weekly_forecast['ds'], weekly_forecast['forecasted_reorder_point'], label='Reorder Point (Forecasted)', color='pink', linestyle='--')
plt.legend()
plt.title('Actual vs Predicted Demand with Safety Stock and Reorder Point')
plt.xlabel('Date')
plt.ylabel('Order Quantity')
plt.show()

## Evaluaztion

In [None]:
from sklearn.metrics import r2_score

# Misalnya, Anda memiliki data aktual dan prediksi
actual_data = [10, 20, 30, 40, 50]
predicted_data = [12, 18, 28, 38, 52]

# Menghitung r-squared (coefficient of determination)
r2 = r2_score(actual_data, predicted_data)

print(f"R-squared: {r2}")


In [None]:
# Import libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load a sample dataset (e.g., Iris dataset)
iris = datasets.load_iris()
X = iris.data  # Features
y = iris.target  # Target variable (class labels)

# Split the dataset into a training set and a testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier
clf = SVC(kernel='linear')  # You can choose different kernels (e.g., 'linear', 'rbf', 'poly')

# Train the SVM classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Evaluate the classifier's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# You can now use 'y_pred' to make predictions on new data


In [None]:
from sklearn.metrics import confusion_matrix

# Create the confusion matrix
confusion = confusion_matrix(y_test, y_pred)

# Print the confusion matrix
print("Confusion Matrix:")
print(confusion)

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Compute precision
precision = precision_score(y_test, y_pred, average='weighted')  # 'weighted' untuk multiclass classification
print(f"Precision: {precision}")

# Compute recall
recall = recall_score(y_test, y_pred, average='weighted')  # 'weighted' untuk multiclass classification
print(f"Recall: {recall}")

# Compute F1 score
f1 = f1_score(y_test, y_pred, average='weighted')  # 'weighted' untuk multiclass classification
print(f"F1 Score: {f1}")

In [None]:
# Import libraries
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor  # Mengganti SVM dengan DecisionTreeRegressor
from sklearn.metrics import mean_squared_error  # Menggunakan mean_squared_error untuk regresi

# Load a sample dataset (e.g., Iris dataset)
iris = datasets.load_iris()
X = iris.data  # Features
y = iris.target  # Target variable (class labels)

# Split the dataset into a training set and a testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Decision Tree Regressor
regressor = DecisionTreeRegressor(random_state=42)

# Train the Decision Tree Regressor on the training data
regressor.fit(X_train, y_train)

# Make predictions on the test data
y_pred = regressor.predict(X_test)

# Evaluate the regressor's performance using mean squared error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# You can now use 'y_pred' to make predictions on new data


## Deployment

In [None]:
!pip install streamlit

In [None]:
import streamlit as st
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA

def main():
    st.title("Aplikasi Peramalan Sederhana")

    # Tambahkan bagian untuk mengunggah file data jika diperlukan
    uploaded_file = st.file_uploader("Unggah file CSV", type="csv")
    if uploaded_file is not None:
        data = pd.read_csv(uploaded_file)
        st.write(data)  # Menampilkan data yang diunggah



In [None]:
%%writefile app.py

import streamlit as st

def main():
    st.title("Aplikasi Streamlit Sederhana")
    st.write("Selamat datang di aplikasi Streamlit!")

if __name__ == "__main__":
    main()

In [None]:
!streamlit run app.py &>/content/logs.txt &

In [9]:
!pip install pyngrok
from pyngrok import ngrok



In [17]:


# Port tempat aplikasi Streamlit berjalan (default: 8501)
port = 80

# Menjalankan ngrok dan mendapatkan URL publik
public_url = ngrok.connect(port)
print('Streamlit app is live at:', public_url)


ERROR:pyngrok.process.ngrok:t=2023-12-21T05:28:18+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session obj=csess id=4a2771669103 err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2023-12-21T05:28:18+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2023-12-21T05:28:18+0000 lvl=eror msg="terminating with error" obj=app err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.co

PyngrokNgrokError: ignored