In [1]:
!pip install streamlit babel



In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import streamlit as st
from babel.numbers import format_currency

In [10]:
data = pd.read_csv("maindatautama.csv")
data.head()

Unnamed: 0,order_id,order_item_id_x,product_id_x,seller_id_x,shipping_limit_date_x,price_x,freight_value_x,payment_sequential,payment_type,payment_installments,...,freight_value_y,product_category_name,product_name_lenght,product_description_lenght,product_photos_qty,product_weight_g,product_length_cm,product_height_cm,product_width_cm,product_category_name_english
0,00010242fe8c5a6d1ba2dd792cb16214,1,4244733e06e7ecb4970a6e2683c13e61,48436dade18ac8b2bce089ec2a041202,2017-09-19 09:45:35,58.9,13.29,1.0,credit_card,2.0,...,13.29,cool_stuff,58.0,598.0,4.0,650.0,28.0,9.0,14.0,cool_stuff
1,00018f77f2f0320c557190d7a144bdd3,1,e5f2d52b802189ee658865ca93d83a8f,dd7ddc04e1b6c2c614352b383efe2d36,2017-05-03 11:05:13,239.9,19.93,1.0,credit_card,3.0,...,19.93,pet_shop,56.0,239.0,2.0,30000.0,50.0,30.0,40.0,pet_shop
2,000229ec398224ef6ca0657da4fc703e,1,c777355d18b72b67abbeef9df44fd0fd,5b51032eddd242adc84c38acab88f23d,2018-01-18 14:48:30,199.0,17.87,1.0,credit_card,5.0,...,17.87,moveis_decoracao,59.0,695.0,2.0,3050.0,33.0,13.0,33.0,furniture_decor
3,00024acbcdf0a6daa1e931b038114c75,1,7634da152a4610f1595efa32f14722fc,9d7a1d34a5052409006425275ba1c2b4,2018-08-15 10:10:18,12.99,12.79,1.0,credit_card,2.0,...,12.79,perfumaria,42.0,480.0,1.0,200.0,16.0,10.0,15.0,perfumery
4,00042b26cf59d7ce69dfabb4e55b4fd9,1,ac6c3623068f30de03045865e4e10089,df560393f3a51e74553ab94004ba5c87,2017-02-13 13:57:51,199.9,18.14,1.0,credit_card,3.0,...,18.14,ferramentas_jardim,59.0,409.0,1.0,3750.0,35.0,40.0,30.0,garden_tools


In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 98363 entries, 0 to 98362
Data columns (total 42 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   order_id                       98363 non-null  object 
 1   order_item_id_x                98363 non-null  int64  
 2   product_id_x                   98363 non-null  object 
 3   seller_id_x                    98363 non-null  object 
 4   shipping_limit_date_x          98363 non-null  object 
 5   price_x                        98363 non-null  float64
 6   freight_value_x                98363 non-null  float64
 7   payment_sequential             98363 non-null  float64
 8   payment_type                   98363 non-null  object 
 9   payment_installments           98363 non-null  float64
 10  payment_value                  98363 non-null  float64
 11  review_id                      98363 non-null  object 
 12  review_score                   98363 non-null 

In [21]:
def create_daily_orders_df(df):
    daily_orders_df = df.resample(rule='D', on='order_approved_at').agg({
        "order_id": "nunique",
        "price_x": "sum"
    })
    daily_orders_df = daily_orders_df.reset_index()
    daily_orders_df.rename(columns={
        "order_id": "order_count",
        "price_x": "revenue"
    }, inplace=True)
    
    return daily_orders_df

def create_sum_product_df(df):
    create_sum_product_df = df.groupby("product_category_name").price_x.sum().sort_values(ascending=False).reset_index()
    return create_sum_product_df

def create_review(df):
    create_review = df.groupby(by="gender").customer_id.nunique().reset_index()
    create_review.rename(columns={
        "customer_id": "customer_count"
    }, inplace=True)
    
    return create_review
    
def create_bystate_df(df):
    bystate_df = df.groupby(by="state").customer_id.nunique().reset_index()
    bystate_df.rename(columns={
        "customer_id": "customer_count"
    }, inplace=True)
    
    return bystate_df

In [23]:
def create_rfm_df(df):
    rfm_df = df.groupby(by="customer_id", as_index=False).agg({
        "order_approved_at": "max", #mengambil tanggal order terakhir
        "order_id": "nunique",
        "price_x": "sum"
    })
    rfm_df.columns = ["customer_id", "max_order_timestamp", "frequency", "monetary"]
    
    rfm_df["max_order_timestamp"] = rfm_df["max_order_timestamp"].dt.date
    recent_date = df["order_approved_at"].dt.date.max()
    rfm_df["recency"] = rfm_df["max_order_timestamp"].apply(lambda x: (recent_date - x).days)
    rfm_df.drop("max_order_timestamp", axis=1, inplace=True)
    
    return rfm_df

In [27]:
datetime_columns = ["order_approved_at", "order_estimated_delivery_date"]
data.sort_values(by="order_approved_at", inplace=True)
data.reset_index(inplace=True)
 
for column in datetime_columns:
    data[column] = pd.to_datetime(data[column])

In [28]:
min_date = data["order_approved_at"].min()
max_date = data["order_approved_at"].max()
 
with st.sidebar:
    # Menambahkan logo perusahaan
    st.image("https://github.com/dicodingacademy/assets/raw/main/logo.png")
    
    # Mengambil start_date & end_date dari date_input
    start_date, end_date = st.date_input(
        label='Rentang Waktu',min_value=min_date,
        max_value=max_date,
        value=[min_date, max_date]
    )

2025-01-29 20:24:04.627 
  command:

    streamlit run C:\Users\LENOVO\AppData\Local\Programs\Python\Python313\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
