In [10]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("df_segmented.csv")

# Lihat 5 baris pertama
# display(df.head())
print(df.head())

# Info kolom & tipe data
print(df.info())

# Cek missing values
print(df.isnull().sum())

# Statistik ringkasan
print(df.describe(include='all'))

   customerid productid  quantity      price      transactiondate  \
0      109318         C         7  80.079844  2023-12-26 12:32:00   
1      993229         C         4  75.195229  2023-08-05 00:00:00   
2      579675         A         8  31.528816  2024-03-11 18:51:00   
3      799826         D         5  98.880218  2023-10-27 22:00:00   
4      121413         A         7  93.188512  2023-12-22 11:38:00   

  paymentmethod                                      storelocation  \
0          Cash          176 Andrew Cliffs\r\nBaileyfort, HI 93354   
1          Cash  11635 William Well Suite 809\r\nEast Kara, MT ...   
2          Cash  910 Mendez Ville Suite 909\r\nPort Lauraland, ...   
3        PayPal  87522 Sharon Corners Suite 500\r\nLake Tammy, ...   
4          Cash  0070 Michelle Island Suite 143\r\nHoland, VA 8...   

  productcategory  discountapplied  totalamount  year  month  day   weekday  \
0           Books        18.677100   455.862764  2023     12   26   Tuesday   
1     

In [11]:
# Segment Profiling Summary
profiling = df.groupby('Segment').agg(
    CustomerCount=('customerid', 'nunique'),
    TotalRevenue=('revenue', 'sum'),
    TotalTransactions=('customerid', 'count')
).reset_index()

# Menambahkan kolom rata-rata revenue per transaksi
profiling['AvgRevenuePerTransaction'] = profiling['TotalRevenue'] / profiling['TotalTransactions']

# Urutkan berdasarkan TotalRevenue
profiling = profiling.sort_values(by='TotalRevenue', ascending=False)

# Tampilkan hasil
profiling.style.format({
    'TotalRevenue': 'Rp{:,.0f}',
    'AvgRevenuePerTransaction': 'Rp{:,.2f}'
})


Unnamed: 0,Segment,CustomerCount,TotalRevenue,TotalTransactions,AvgRevenuePerTransaction
0,Big Spenders,13303,"Rp5,939,901",13303,Rp446.51
2,Frequent Buyers,22083,"Rp5,908,984",23791,Rp248.37
6,Recent Customers,14808,"Rp3,667,746",14808,Rp247.69
1,Champions,7315,"Rp3,613,594",9787,Rp369.22
4,Loyal Customers,16135,"Rp2,966,605",16740,Rp177.22
5,Others,11850,"Rp1,498,558",11850,Rp126.46
3,Lost,9721,"Rp1,238,107",9721,Rp127.36


### 🧮 Segment Profiling Summary

Berikut adalah rangkuman profil tiap segmen berdasarkan:
- Jumlah pelanggan unik (CustomerCount)
- Total pendapatan (TotalRevenue)
- Jumlah transaksi
- Rata-rata revenue per transaksi

Ini berguna untuk mengetahui segmen mana yang paling menguntungkan dan memiliki pelanggan paling aktif.


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Pastikan 'transactiondate' sudah jadi datetime dan ada kolom 'segment'
df['transactiondate'] = pd.to_datetime(df['transactiondate'])
df['year_month'] = df['transactiondate'].dt.to_period('M').astype(str)

# Hitung jumlah transaksi per segment per bulan
monthly_segment = df.groupby(['year_month', 'segment']).size().reset_index(name='transaction_count')

# Visualisasi
plt.figure(figsize=(14, 6))
sns.lineplot(data=monthly_segment, x='year_month', y='transaction_count', hue='segment', marker='o')
plt.title('📊 Jumlah Transaksi per Segment per Bulan')
plt.xlabel('Bulan')
plt.ylabel('Jumlah Transaksi')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.legend(title='Segment')
plt.show()
