In [3]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import pandas as pd

# Load dataset
df = pd.read_csv('/content/drive/MyDrive/Dibimbing/train.csv')

# Tampilkan 5 baris pertama
df.head()


Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales
0,1,CA-2017-152156,08/11/2017,11/11/2017,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420.0,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96
1,2,CA-2017-152156,08/11/2017,11/11/2017,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420.0,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94
2,3,CA-2017-138688,12/06/2017,16/06/2017,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036.0,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62
3,4,US-2016-108966,11/10/2016,18/10/2016,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311.0,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775
4,5,US-2016-108966,11/10/2016,18/10/2016,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311.0,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368


Langkah 1: Membuat Script Streamlit (app.py)

In [5]:
%%writefile app.py
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

st.set_page_config(layout="wide")
st.title("ðŸ“Š EDA Superstore Dashboard")

# Upload CSV file
uploaded_file = st.file_uploader("ðŸ“‚ Upload Superstore Dataset (.csv)", type=["csv"])

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)

    # Tampilkan tabel
    st.subheader("Preview Dataset")
    st.dataframe(df.head())

    # Checkbox untuk shape dan info
    if st.checkbox("Show dataset shape"):
        st.write(f"Rows: {df.shape[0]}, Columns: {df.shape[1]}")

    # Selectbox: pilih kolom numerik
    numeric_cols = df.select_dtypes(include='number').columns.tolist()
    selected_col = st.selectbox("Select a numeric column to analyze", numeric_cols)

    # Histogram
    st.subheader(f"Histogram of {selected_col}")
    bins = st.slider("Bins", 5, 50, 20)
    fig, ax = plt.subplots()
    ax.hist(df[selected_col], bins=bins, color="skyblue")
    st.pyplot(fig)

    # Pie chart kategori
    if "Category" in df.columns:
        st.subheader("ðŸ“Ž Product Category Distribution")
        cat_counts = df["Category"].value_counts()
        fig2, ax2 = plt.subplots()
        ax2.pie(cat_counts, labels=cat_counts.index, autopct='%1.1f%%', startangle=90)
        ax2.axis('equal')
        st.pyplot(fig2)

    # Barplot per segmen
    if "Segment" in df.columns and "Sales" in df.columns:
        st.subheader("ðŸ’¼ Sales by Segment")
        fig3, ax3 = plt.subplots()
        sns.barplot(x="Segment", y="Sales", data=df, ci=None, estimator=sum, ax=ax3)
        st.pyplot(fig3)


Writing app.py
