In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from google.colab import drive

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
df = pd.read_csv('/content/drive/MyDrive/MSIB Bitlabs Data Analytics for Business/PBL/data_cleaning/transaction.csv', parse_dates=['transaction_created_datetime', 'transaction_updated_datetime'])
promotion = pd.read_csv('/content/drive/MyDrive/MSIB Bitlabs Data Analytics for Business/PBL/data_cleaning/promotion.csv')

In [64]:
user = pd.read_csv('/content/drive/MyDrive/MSIB Bitlabs Data Analytics for Business/PBL/data_cleaning/user.csv')

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 9 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   dpt_id                        50000 non-null  object        
 1   dpt_promotion_id              50000 non-null  object        
 2   buyer_id                      50000 non-null  object        
 3   seller_id                     50000 non-null  object        
 4   transaction_amount            50000 non-null  float64       
 5   payment_method_name           50000 non-null  object        
 6   payment_provider_name         50000 non-null  object        
 7   transaction_created_datetime  50000 non-null  datetime64[ns]
 8   transaction_updated_datetime  50000 non-null  datetime64[ns]
dtypes: datetime64[ns](2), float64(1), object(6)
memory usage: 3.4+ MB


In [7]:
promotion.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 728 entries, 0 to 727
Data columns (total 4 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   dpt_promotion_id                   728 non-null    object 
 1   promotion_code                     728 non-null    object 
 2   promotion_name                     728 non-null    object 
 3   transaction_promo_cashback_amount  728 non-null    float64
dtypes: float64(1), object(3)
memory usage: 22.9+ KB


# Transaction Amount Over Time

In [12]:
from plotly.subplots import make_subplots
import plotly.express as px

# Agregasi data: Total transaction amount per day
df_daily_amount = (
    df.groupby(df["transaction_created_datetime"].dt.date)["transaction_amount"]
    .sum()
    .reset_index()
    .rename(columns={"transaction_created_datetime": "date", "transaction_amount": "total_transaction_amount"})
)

# Agregasi data: Transaction frequency per day
df_daily_frequency = (
    df.groupby(df["transaction_created_datetime"].dt.date)["dpt_id"]
    .count()
    .reset_index()
    .rename(columns={"transaction_created_datetime": "date", "dpt_id": "transaction_count"})
)

# Calculate max transaction amount for y-axis range
max_transaction_amount = df_daily_amount["total_transaction_amount"].max()
max_tick_value = int((max_transaction_amount // 1e9) + 1)  # Round up to nearest billion

# Create subplots
fig = make_subplots(
    rows=2, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.1,
    subplot_titles=("Transaction Amounts Over Time", "Transaction Frequency Over Time")
)

# Add line plot for transaction amounts
fig.add_trace(
    px.line(
        df_daily_amount,
        x="date",
        y="total_transaction_amount"
    ).data[0],
    row=1, col=1
)

# Add line plot for transaction frequency
fig.add_trace(
    px.line(
        df_daily_frequency,
        x="date",
        y="transaction_count"
    ).data[0],
    row=2, col=1
)

# Update layout for better range display on y-axis of the first plot
fig.update_yaxes(
    row=1, col=1,
    tickvals=[i * 1e9 for i in range(max_tick_value + 1)],  # Tick values up to the maximum
    ticktext=[f"{i}B" for i in range(max_tick_value + 1)],  # Formatting ticks as '0B', '1B', ..., 'max_tick_value B'
)

fig.update_layout(
    height=1200
)

fig.show()


In [14]:
df[(df["transaction_created_datetime"].dt.date == pd.to_datetime("2023-04-14").date())].sort_values(by='transaction_amount', ascending=False)

Unnamed: 0,dpt_id,dpt_promotion_id,buyer_id,seller_id,transaction_amount,payment_method_name,payment_provider_name,transaction_created_datetime,transaction_updated_datetime
4042,f620ec3ab2a4092ec1ea1d0913f0cc8b99d5ac448d77ee...,no promotion,dd9a9195c84e334de861047a82118fd24ada1030d0688b...,dd9a9195c84e334de861047a82118fd24ada1030d0688b...,2.014010e+10,MITRA_PEMBAYARAN_DIGITAL,TOKOPEDIA,2023-04-14 07:54:50.944876,2023-04-14 07:54:50.944876
12688,6c77b1e4e131e305e61acee3cac674ec2fbe2d7e37c815...,no promotion,6443c61ffcea2faaf7590a73f723f63d0c1998afe0a151...,5d2233f5a1a6435891142442fac09a77809d0c16496f07...,5.000014e+08,BANK_TRANSFER,BCA_MANUAL,2023-04-14 16:23:33.385429,2023-04-14 16:28:10.052749
30575,dcc071b5779bff5b136a6402a75b09a8e0e9584150cab2...,no promotion,6443c61ffcea2faaf7590a73f723f63d0c1998afe0a151...,5d2233f5a1a6435891142442fac09a77809d0c16496f07...,5.000008e+08,BANK_TRANSFER,BCA_MANUAL,2023-04-14 15:42:45.045786,2023-04-14 15:57:11.770049
13044,9247e3cb2cfb65fb8b02ed598b3e4796301216a16b287a...,no promotion,9448fc3cb0eb59d4823d289082b8df56a945b710b80295...,5d2233f5a1a6435891142442fac09a77809d0c16496f07...,2.000008e+08,BANK_TRANSFER,BCA_MANUAL,2023-04-14 17:03:47.482674,2023-04-14 18:32:52.867833
37742,a7beac17193f1df7e86650e646b5662b38449d68a1c86c...,no promotion,6443c61ffcea2faaf7590a73f723f63d0c1998afe0a151...,5d2233f5a1a6435891142442fac09a77809d0c16496f07...,1.000002e+08,BANK_TRANSFER,BCA_MANUAL,2023-04-14 20:40:22.125522,2023-04-14 21:02:32.676436
...,...,...,...,...,...,...,...,...,...
25485,b67675b9a719cd3d16a1611e985e8f7d408d0526d7d4f7...,no promotion,2155a0b3ec4ef3cb18f0890eab8177381c7e96dad96297...,2155a0b3ec4ef3cb18f0890eab8177381c7e96dad96297...,1.014500e+04,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-04-14 10:47:07.613207,2023-04-14 10:50:51.564429
3922,d2deae4dbb3723f9af6d49310e452eb074176409b2cc03...,no promotion,10f3200ad77826457a7b33726d1ec95de21ee6400745aa...,10f3200ad77826457a7b33726d1ec95de21ee6400745aa...,1.014500e+04,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-04-14 21:14:29.256850,2023-04-14 21:15:39.006497
29673,3ddbd6002ca761ed54e6fa924f433b2eabe366678e0993...,no promotion,2155a0b3ec4ef3cb18f0890eab8177381c7e96dad96297...,2155a0b3ec4ef3cb18f0890eab8177381c7e96dad96297...,1.014500e+04,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-04-14 05:13:29.191225,2023-04-14 05:14:49.177713
33517,af42ee98921f8c3fddf33a4f57254811a11a807ca4aa4c...,no promotion,10f3200ad77826457a7b33726d1ec95de21ee6400745aa...,10f3200ad77826457a7b33726d1ec95de21ee6400745aa...,1.014500e+04,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-04-14 17:23:35.546880,2023-04-14 17:31:56.446888


In [58]:
df[(df["transaction_created_datetime"].dt.date == pd.to_datetime("2023-04-14").date())].describe()

Unnamed: 0,transaction_amount,transaction_created_datetime,transaction_updated_datetime
count,110.0,110,110
mean,206871000.0,2023-04-14 12:37:03.046355200,2023-04-15 17:22:08.116631040
min,10145.0,2023-04-14 00:22:54.065165,2023-04-14 01:19:51.998980
25%,24958.9,2023-04-14 09:16:31.683661056,2023-04-14 10:47:19.347904512
50%,650074.2,2023-04-14 12:41:45.581100544,2023-04-14 13:38:43.455645184
75%,11689580.0,2023-04-14 16:25:14.631651072,2023-04-14 17:29:44.734825984
max,20140100000.0,2023-04-14 23:28:20.493883,2023-08-07 07:54:00.615037
std,1919359000.0,,


Terjadi lonjakan total nilai tansaksi harian yang luar biasa pada tanggal 14 April 2023.Ini dipengaruhi oleh satu  self transaction yang tidak normal senilai sebelas digit.

In [56]:
df[(df["transaction_created_datetime"].dt.date == pd.to_datetime("2023-06-18").date())].sort_values(by='transaction_amount', ascending=False)

Unnamed: 0,dpt_id,dpt_promotion_id,buyer_id,seller_id,transaction_amount,payment_method_name,payment_provider_name,transaction_created_datetime,transaction_updated_datetime
49985,64df847059e06e478a733fcedeed863c182bc59e9c2b86...,no promotion,81046b351fb34308b3d2352537f2b7bfb512ec1ca217e8...,5d2233f5a1a6435891142442fac09a77809d0c16496f07...,4.009250e+08,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-06-18 13:25:16.226593,2023-06-18 13:27:48.435377
34286,b2774e4b2d7df5b85be485477d7ad65c8c91f35adbe5f8...,no promotion,06aa8c62e26b49cfe99f34d583c7c197756cbe35878bca...,5d2233f5a1a6435891142442fac09a77809d0c16496f07...,4.000000e+08,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-06-18 00:18:47.499499,2023-06-18 00:18:47.499499
12228,bf4ba82cc3b5d61d39955227ba4d74eaec1413980cfcc0...,no promotion,06aa8c62e26b49cfe99f34d583c7c197756cbe35878bca...,5d2233f5a1a6435891142442fac09a77809d0c16496f07...,4.000000e+08,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-06-18 00:18:06.851828,2023-06-18 00:18:06.851828
18422,219577d6b39d3621786cd3e2ccef54287e4d16431997e3...,no promotion,adf024dab63cd741acb845cf75ac28a836cfba564df266...,5d2233f5a1a6435891142442fac09a77809d0c16496f07...,3.045000e+08,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-06-18 08:41:17.603668,2023-06-26 11:38:52.459164
36806,c718f4fbb7b02795ee4d6b9d7864525c0be2307fa3622d...,promotion-292891995,e9f023c7e1d62ff3380b2069f758d5433733c1503ae135...,5d2233f5a1a6435891142442fac09a77809d0c16496f07...,2.840803e+08,CREDIT_CARD,JCB,2023-06-18 15:46:27.407844,2023-06-18 15:48:39.684352
...,...,...,...,...,...,...,...,...,...
45417,42f73a1b123368c3f2f0988a2de2ef8c158efb34d9c69f...,no promotion,a77ddf7b007c7f27700a9db82956ccbd89f950a2a854af...,a77ddf7b007c7f27700a9db82956ccbd89f950a2a854af...,1.999550e+04,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-06-18 11:16:02.262846,2023-06-18 11:16:32.169144
35480,a52c89ae506fb709ef82910743d645e001939de28efec3...,no promotion,6dd310a1d631e1a82bde42de81b16a8964bf40c69343d4...,5d2233f5a1a6435891142442fac09a77809d0c16496f07...,1.680028e+04,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-06-18 17:15:21.592219,2023-06-19 11:07:35.567032
46985,b5ba45090cd0d4dede506feaedfa40ac8fea9cf2a85350...,no promotion,0bb440f2ae8461ca7b424f9b0efddbb2a1993e07a6d629...,0bb440f2ae8461ca7b424f9b0efddbb2a1993e07a6d629...,1.500000e+04,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-06-18 14:44:07.392724,2023-06-18 19:08:42.147922
36718,d7a5e8d69986f54fecadad268555350d4d7bef1be4412d...,no promotion,984f600688711226255556d914b02b3a3fdcbe9d367b76...,5d2233f5a1a6435891142442fac09a77809d0c16496f07...,1.497930e+04,MITRA_PEMBAYARAN_DIGITAL,BLIBLI,2023-06-18 03:22:27.613550,2023-06-18 03:23:11.242410


In [57]:
df[(df["transaction_created_datetime"].dt.date == pd.to_datetime("2023-06-18").date())].describe()

Unnamed: 0,transaction_amount,transaction_created_datetime,transaction_updated_datetime
count,266.0,266,266
mean,42071300.0,2023-06-18 14:34:53.640132352,2023-06-21 20:06:21.122921984
min,10150.0,2023-06-18 00:02:16.163369,2023-06-18 00:02:16.163369
25%,1009400.0,2023-06-18 10:28:46.134938368,2023-06-18 11:03:47.690789888
50%,10000010.0,2023-06-18 14:27:47.777505536,2023-06-18 14:58:44.111866880
75%,99999010.0,2023-06-18 19:58:41.200546816,2023-06-18 20:43:00.643092992
max,400925000.0,2023-06-18 23:51:21.176713,2023-12-01 08:18:18.818069
std,65532020.0,,


# Buyer-Seller Network Graph

In [15]:
import networkx as nx
import plotly.graph_objects as go

#  Load the transaction data
df = df.copy()

#: Filter out self transactions (buyer_id != seller_id)
df_filtered = df[df['buyer_id'] != df['seller_id']]

#  Create the edge list DataFrame (buyer_id, seller_id)
edge_list = df_filtered[['buyer_id', 'seller_id']]

#  Create the graph
G = nx.from_pandas_edgelist(edge_list, source='buyer_id', target='seller_id')

In [59]:
G.number_of_nodes()

6843

### Menemukan subgraph terbesar

In [16]:
# 1. Temukan semua komponen terhubung
connected_components = list(nx.connected_components(G))

# 2. Dapatkan subgraf untuk setiap komponen dan jumlah node
subgraphs_info = []

for component in connected_components:
    subgraph = G.subgraph(component)
    subgraphs_info.append((subgraph, len(subgraph.nodes())))

# 3. Tampilkan informasi subgraf dan jumlah node
for idx, (subgraph, node_count) in enumerate(subgraphs_info):
    print(f"Subgraf {idx + 1}: {node_count} node(s)")

Subgraf 1: 6591 node(s)
Subgraf 2: 14 node(s)
Subgraf 3: 2 node(s)
Subgraf 4: 2 node(s)
Subgraf 5: 2 node(s)
Subgraf 6: 2 node(s)
Subgraf 7: 2 node(s)
Subgraf 8: 2 node(s)
Subgraf 9: 2 node(s)
Subgraf 10: 2 node(s)
Subgraf 11: 3 node(s)
Subgraf 12: 2 node(s)
Subgraf 13: 2 node(s)
Subgraf 14: 2 node(s)
Subgraf 15: 2 node(s)
Subgraf 16: 3 node(s)
Subgraf 17: 3 node(s)
Subgraf 18: 4 node(s)
Subgraf 19: 2 node(s)
Subgraf 20: 2 node(s)
Subgraf 21: 2 node(s)
Subgraf 22: 2 node(s)
Subgraf 23: 2 node(s)
Subgraf 24: 2 node(s)
Subgraf 25: 2 node(s)
Subgraf 26: 2 node(s)
Subgraf 27: 2 node(s)
Subgraf 28: 3 node(s)
Subgraf 29: 4 node(s)
Subgraf 30: 3 node(s)
Subgraf 31: 2 node(s)
Subgraf 32: 2 node(s)
Subgraf 33: 2 node(s)
Subgraf 34: 3 node(s)
Subgraf 35: 2 node(s)
Subgraf 36: 3 node(s)
Subgraf 37: 2 node(s)
Subgraf 38: 2 node(s)
Subgraf 39: 2 node(s)
Subgraf 40: 2 node(s)
Subgraf 41: 2 node(s)
Subgraf 42: 2 node(s)
Subgraf 43: 3 node(s)
Subgraf 44: 2 node(s)
Subgraf 45: 2 node(s)
Subgraf 46: 2 n

### Analisis subgraf kedua terbesar (14 node)

In [66]:
# Pilih subgraf ke-2 dengan total 14 node
subgraph_2_nodes = list(subgraphs_info[1][0].nodes())  # Indeks 1 untuk subgraf ke-2

# Filter transaksi hanya untuk pasangan buyer-seller di subgraf ke-2
df_subgraph_2 = df_filtered[
    (df_filtered['buyer_id'].isin(subgraph_2_nodes)) &
    (df_filtered['seller_id'].isin(subgraph_2_nodes))
]

print(f"Total transaksi di subgraf ke-2: {len(df_subgraph_2)}")
df_subgraph_2  # Tampilkan contoh data


Total transaksi di subgraf ke-2: 32


Unnamed: 0,dpt_id,dpt_promotion_id,buyer_id,seller_id,transaction_amount,payment_method_name,payment_provider_name,transaction_created_datetime,transaction_updated_datetime
339,e6d837da7080c7df14a4c8b0c8bdc294f5263c585342ca...,no promotion,985cafd6d0d1c62748bbcfe8be780c09de7ce6b80a6d67...,cc5931492ffb69621da4f4b6d075380baf7d46e7e7af28...,1524200.0,MITRA_PEMBAYARAN_DIGITAL,TOKOPEDIA,2023-02-23 07:19:19.907320,2023-02-23 07:19:19.907320
630,9f27af1d4d2d605815634d7fad22deb4f4ccce16696bda...,no promotion,985cafd6d0d1c62748bbcfe8be780c09de7ce6b80a6d67...,cc5931492ffb69621da4f4b6d075380baf7d46e7e7af28...,1524200.0,MITRA_PEMBAYARAN_DIGITAL,TOKOPEDIA,2023-05-09 08:34:00.447857,2023-05-09 08:34:00.447857
2491,43247d31554ebc6e0575cc29f03afd02eda4e0349da881...,no promotion,985cafd6d0d1c62748bbcfe8be780c09de7ce6b80a6d67...,cc5931492ffb69621da4f4b6d075380baf7d46e7e7af28...,1524400.0,MITRA_PEMBAYARAN_DIGITAL,TOKOPEDIA,2023-01-31 06:48:38.812312,2023-01-31 06:48:38.812312
2619,af24acc6f50fdd509517574b3a84676d5a1f12841bd753...,no promotion,985cafd6d0d1c62748bbcfe8be780c09de7ce6b80a6d67...,cc5931492ffb69621da4f4b6d075380baf7d46e7e7af28...,827800.0,MITRA_PEMBAYARAN_DIGITAL,TOKOPEDIA,2023-07-03 07:16:01.877409,2023-07-03 07:16:01.877409
3793,831256c4476a1bdf902bc032b145d251598e37ee5eb5fb...,no promotion,cf436a102ec954a16e2b828b5e3936118ab200419f5a06...,04ada8d5a21b3ee301e6841345139acf0be91492b6382b...,1526200.0,MITRA_PEMBAYARAN_DIGITAL,TOKOPEDIA,2023-06-08 06:48:19.719933,2023-06-08 06:48:19.719933
3897,f4684977d6fa44ba411c279f49f420b0bc4da88d6ecb1c...,no promotion,cf436a102ec954a16e2b828b5e3936118ab200419f5a06...,93628e8e3030e3e1d4f0c4288991fac02621374a426317...,827000.0,MITRA_PEMBAYARAN_DIGITAL,TOKOPEDIA,2023-10-17 11:34:46.072276,2023-10-17 11:34:46.072276
10178,be1a5d1154beb3423973eb4e708001f3d1f04714b9849a...,no promotion,985cafd6d0d1c62748bbcfe8be780c09de7ce6b80a6d67...,28c382af6c6f5e60263c2d6e28589b2af8cd05a79d813d...,821000.0,MITRA_PEMBAYARAN_DIGITAL,TOKOPEDIA,2023-12-09 20:34:45.286478,2023-12-09 20:34:52.365826
11682,6295da80767053a9e852cf30f62589b225ba2fac277a5d...,no promotion,93628e8e3030e3e1d4f0c4288991fac02621374a426317...,388dbd8594db84a0485ad0085b7b2cceef013e10ca3e72...,1505000.0,MITRA_PEMBAYARAN_DIGITAL,TOKOPEDIA,2023-05-29 11:45:38.049412,2023-05-29 11:45:48.839777
12997,7b9b610330651cb2aeeda06232c2012f542f8af40d7e49...,no promotion,cc5931492ffb69621da4f4b6d075380baf7d46e7e7af28...,93628e8e3030e3e1d4f0c4288991fac02621374a426317...,823000.0,MITRA_PEMBAYARAN_DIGITAL,TOKOPEDIA,2023-12-25 19:48:44.966280,2023-12-25 19:48:52.841891
17563,01be7b8798a7484b8cdaef07936d145761cbf96bfdf53f...,no promotion,cf436a102ec954a16e2b828b5e3936118ab200419f5a06...,7a17d8962fed802f30e0e68fd36767d4128caac3c6ca30...,1522100.0,MITRA_PEMBAYARAN_DIGITAL,TOKOPEDIA,2023-03-17 06:53:24.045996,2023-03-17 06:53:24.045996


In [69]:
# Ambil node dari subgraf ke-2
subgraph_2_nodes = list(subgraphs_info[1][0].nodes())  # Indeks 1 untuk subgraf ke-2

# Filter data user berdasarkan node di subgraf ke-2
user_filtered = user[user['company_id'].isin(subgraph_2_nodes)]

user_filtered


Unnamed: 0,company_id,company_kyc_status_name,company_kyb_status_name,company_type_group,company_phone_verified_flag,company_email_verified_flag,user_fraud_flag,testing_account_flag,blacklist_account_flag,package_active_name,company_registered_datetime
286,985cafd6d0d1c62748bbcfe8be780c09de7ce6b80a6d67...,VALIDASI_BERHASIL,BELUM_VALIDASI,PERORANGAN,0.0,1.0,0.0,0.0,0.0,FREE,2021-08-09 17:33:55
762,6e0fe0280311be18ab37e2ac10675354464e6448e38e4e...,VALIDASI_BERHASIL,BELUM_VALIDASI,PERORANGAN,0.0,1.0,0.0,0.0,0.0,FREE,2021-08-09 18:49:55
1384,93628e8e3030e3e1d4f0c4288991fac02621374a426317...,VALIDASI_BERHASIL,BELUM_VALIDASI,PERORANGAN,1.0,1.0,0.0,0.0,0.0,FREE,2021-12-21 20:28:25
1533,cf436a102ec954a16e2b828b5e3936118ab200419f5a06...,VALIDASI_BERHASIL,BELUM_VALIDASI,PERORANGAN,0.0,1.0,0.0,0.0,0.0,FREE,2021-08-19 23:36:07
1828,73a350935a9c1f4f2bf477a961388cf316eb2282ec50e6...,VALIDASI_BERHASIL,BELUM_VALIDASI,PERORANGAN,0.0,1.0,0.0,0.0,0.0,FREE,2021-08-20 01:20:38
1992,28c382af6c6f5e60263c2d6e28589b2af8cd05a79d813d...,VALIDASI_BERHASIL,BELUM_VALIDASI,PERORANGAN,0.0,1.0,0.0,0.0,0.0,FREE,2021-08-20 01:14:09
2013,0fe7c714dcccfbf4b9ed7a6d51bb7e0f1f23244d87e74b...,VALIDASI_BERHASIL,BELUM_VALIDASI,PERORANGAN,0.0,1.0,0.0,0.0,0.0,FREE,2021-08-09 21:55:31
2818,04ada8d5a21b3ee301e6841345139acf0be91492b6382b...,VALIDASI_BERHASIL,BELUM_VALIDASI,PERORANGAN,1.0,1.0,0.0,0.0,0.0,FREE,2021-07-07 21:49:47
3358,cc5931492ffb69621da4f4b6d075380baf7d46e7e7af28...,VALIDASI_BERHASIL,BELUM_VALIDASI,PERORANGAN,0.0,1.0,0.0,0.0,0.0,FREE,2021-08-20 00:59:09
3831,7810cf1bfee064d584ddf8d5e087b846cb8a461fc85714...,VALIDASI_BERHASIL,BELUM_VALIDASI,PERORANGAN,1.0,1.0,0.0,0.0,0.0,FREE,2021-07-08 00:30:17


### Membuat graph terbesar dan graf-graf selain graf terbesar

In [18]:
#  Temukan komponen terhubung terbesar
largest_component = max(nx.connected_components(G), key=len)
#  Buat subgraf dari komponen terbesar
G_largest = G.subgraph(largest_component)
# Buat subgraph selain komponen terbesar
G_outside_largest = G.subgraph(set(G.nodes) - largest_component)


In [60]:
G_largest.number_of_nodes()

6591

In [63]:
G_outside_largest.number_of_nodes()

252

### Menemukan central node dari subgraph terbesar

In [19]:
#  Hitung degree centrality
degree_centrality = nx.degree_centrality(G_largest)
sorted_degree_centrality = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
sorted_degree_centrality

[('5d2233f5a1a6435891142442fac09a77809d0c16496f07b2575c3a451fbd7f60',
  0.9644916540212443),
 ('b4c5286fbf6443dd4df37457f3ef23a2b71253b1c86452f824e8856befba3483',
  0.0019726858877086493),
 ('ed654f3687ca1ef5da46e0d2988c906c02a740fa1fb1bf68f0d3725ac35e2201',
  0.0012139605462822458),
 ('50e77d9e9f3063dc30825b05047d3ef4e9a9e21ae4bc18be445a8a8230451bbc',
  0.0009104704097116843),
 ('053819be0c911b698bbd253728ba9b9a85779f2d9a5a39503b29f9f01cd0aa0e',
  0.0007587253414264037)]

In [20]:
# Temukan central node
central_node_dc = max(degree_centrality, key=degree_centrality.get)
central_node_dc

'5d2233f5a1a6435891142442fac09a77809d0c16496f07b2575c3a451fbd7f60'

In [21]:
#  Hitung eigenvector centrality
eigenvector_centrality = nx.eigenvector_centrality(G_largest, max_iter=1000, tol=1e-6)
sorted_eigenvector_centrality = sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
sorted_eigenvector_centrality

[('5d2233f5a1a6435891142442fac09a77809d0c16496f07b2575c3a451fbd7f60',
  0.7070334662403112),
 ('ed654f3687ca1ef5da46e0d2988c906c02a740fa1fb1bf68f0d3725ac35e2201',
  0.009212491316665976),
 ('8764fbf59745f1235c38abb5c858dcd2a4b4b511f725c251932baedbf63d0ef2',
  0.009208123368957181),
 ('b4c5286fbf6443dd4df37457f3ef23a2b71253b1c86452f824e8856befba3483',
  0.009112641835375121),
 ('02611e2fdd7d730bddbd654baf24f03a739704bcb34c010c3cb54d3069087eea',
  0.009099900559392152)]

In [22]:
central_node_ec= max(eigenvector_centrality, key=degree_centrality.get)
central_node_ec

'5d2233f5a1a6435891142442fac09a77809d0c16496f07b2575c3a451fbd7f60'

### Visualisasi Network Subgraph Terbesar

In [29]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Fungsi untuk membuat traces dari sebuah graf
def create_graph_traces(G, title, layout_seed=42):
    pos = nx.spring_layout(G, seed=layout_seed)  # Generate positions for the graph

    # Edge coordinates
    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#888'),
        hoverinfo='none',
        mode='lines'
    )

    # Node coordinates
    node_x = []
    node_y = []
    node_text = []
    node_sizes = []
    node_colors = []

    # Find the central node (with the highest degree)
    central_node = max(G.nodes(), key=lambda node: len(list(G.adj[node])))

    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)

        degree = len(list(G.adj[node]))
        node_text.append(f"Node: {node}<br>Connections: {degree}")

        # Set size and color based on degree
        if node == central_node:
            node_sizes.append(10)  # Highlight central node
            node_colors.append('red')  # Central node in red
        else:
            node_sizes.append(10)  # Adjust size based on degree
            node_colors.append(degree)  # Use degree for color

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=True,
            colorscale='Viridis',
            size=node_sizes,
            color=node_colors,
            colorbar=dict(
                thickness=15,
                title='Node Degree',
                xanchor='left',
                titleside='right'
            ),
            line_width=2
        ),
        text=node_text
    )

    return [edge_trace, node_trace]

In [30]:
traces_largest = create_graph_traces(G_largest, "Largest Subgraph")
traces_outside = create_graph_traces(G_outside_largest, "Outside Largest Subgraph")

In [35]:
# Create the figure for the largest subgraph
fig_largest = go.Figure()

# Add traces for the largest subgraph
for trace in traces_largest:
    fig_largest.add_trace(trace)

# Update layout for the largest subgraph
fig_largest.update_layout(
    title_text="Visualization of Largest Subgraph",
    titlefont_size=16,
    showlegend=False,
    hovermode="closest",
    margin=dict(b=0, l=0, r=0, t=40),
    xaxis=dict(showgrid=False, zeroline=False),
    yaxis=dict(showgrid=False, zeroline=False),
    height=800  # Adjust height for better visibility
)

# Show the figure for the largest subgraph
fig_largest.show()


In [37]:
# Create the figure for the outside largest subgraph
fig_outside = go.Figure()

# Add traces for the outside largest subgraph
for trace in traces_outside:
    fig_outside.add_trace(trace)

# Update layout for the outside largest subgraph
fig_outside.update_layout(
    title_text="Visualization of Outside Largest Subgraph",
    titlefont_size=16,
    showlegend=False,
    hovermode="closest",
    margin=dict(b=0, l=0, r=0, t=40),
    xaxis=dict(showgrid=False, zeroline=False),
    yaxis=dict(showgrid=False, zeroline=False),
    height=800  # Adjust height for better visibility
)

# Show the figure for the outside largest subgraph
fig_outside.show()


# Promotional Activities

In [38]:
# Membaca data transaksi dan promosi
transaction_data = df
promotion_data = promotion

# Hanya menyertakan transaksi dengan promosi
promo_transactions = transaction_data[transaction_data["dpt_promotion_id"] != "no promotion"]

In [39]:
print(f"Persentase transaksi menggunakan promo hanya {len(promo_transactions)/len(transaction_data)*100:.2f}%")

Persentase transaksi menggunakan promo hanya 1.87%


### Aktivitas promosi harian

In [41]:
# Analisis aktivitas promosi
promo_usage_by_day = promo_transactions.groupby(promo_transactions["transaction_created_datetime"].dt.date).size().reset_index(name="promo_count")
promo_usage_by_day.columns = ["date", "promo_count"]

# Visualisasi aktivitas promosi
fig_promo_activity = px.line(
    promo_usage_by_day,
    x="date",
    y="promo_count",
    title="Aktivitas Promosi Harian",
    labels={"date": "Tanggal", "promo_count": "Jumlah Transaksi Menggunakan Promo"},
    template="plotly_white"
)
fig_promo_activity.show()

### Jumlah buyer berdasarkan banyaknya penggunaan promo

In [47]:
# Kelompokkan berdasarkan buyer_id, lalu hitung jumlah transaksi yang dilakukan setiap buyer
buyer_promo_usage = promo_transactions.groupby('buyer_id').size().reset_index(name='promo_usage')

# Menghitung jumlah buyer berdasarkan banyaknya transaksi promo yang dilakukan
usage_summary = buyer_promo_usage['promo_usage'].value_counts().reset_index()
usage_summary.columns = ['promo_usage', 'buyer_count']

# Menyortir hasil
usage_summary = usage_summary.sort_values(by='promo_usage', ascending=False)

# Menampilkan hasil
print("Jumlah Buyer Berdasarkan Banyaknya Penggunaan Promo:")
for _, row in usage_summary.iterrows():
    print(f"Buyer menggunakan promo sebanyak {row['promo_usage']} kali: {row['buyer_count']}     ({row['buyer_count']/len(buyer_promo_usage)*100:.2f}%)")


Jumlah Buyer Berdasarkan Banyaknya Penggunaan Promo:
Buyer menggunakan promo sebanyak 8 kali: 1     (0.14%)
Buyer menggunakan promo sebanyak 6 kali: 2     (0.27%)
Buyer menggunakan promo sebanyak 5 kali: 2     (0.27%)
Buyer menggunakan promo sebanyak 4 kali: 7     (0.96%)
Buyer menggunakan promo sebanyak 3 kali: 26     (3.56%)
Buyer menggunakan promo sebanyak 2 kali: 109     (14.93%)
Buyer menggunakan promo sebanyak 1 kali: 583     (79.86%)


In [49]:
# Membuat bar chart
fig_promo_abuse = px.bar(
    buyer_promo_usage.value_counts("promo_usage").reset_index(name="count"),
    x="promo_usage",
    y="count",
    title="Distribusi Penggunaan Promosi per Pengguna",
    labels={"promo_usage": "Jumlah Penggunaan Promosi", "count": "Jumlah Pengguna"},
    template="plotly_white"
)

# Tambahkan label jumlah di setiap bar
fig_promo_abuse.update_traces(
    texttemplate='%{y}',  # Menampilkan nilai di setiap bar
    textposition='outside'  # Posisi teks di atas bar
)

# Tampilkan grafik
fig_promo_abuse.show()


### Jumlah buyer berdasarkan banyaknya penggunaan promo yang sama

In [52]:
# Kelompokkan berdasarkan buyer_id dan dpt_promotion_id, lalu hitung jumlah transaksi
buyer_promotion_counts = (
    promo_transactions.groupby(['buyer_id', 'dpt_promotion_id'])
    .size()
    .reset_index(name='promo_transaction_count')
)

# Menghitung jumlah pasangan buyer-promo berdasarkan berapa kali kode promo digunakan
usage_summary = buyer_promotion_counts['promo_transaction_count'].value_counts().reset_index()
usage_summary.columns = ['promo_transaction_count', 'pair_count']

# Menyortir hasil
usage_summary = usage_summary.sort_values(by='promo_transaction_count', ascending=False)

# Menampilkan hasil
print("Jumlah Pasangan Buyer-Promo Berdasarkan Banyaknya Penggunaan Promo yang Sama:")
for _, row in usage_summary.iterrows():
    print(f"Buyer menggunakan satu kode promo sebanyak {row['promo_transaction_count']} kali: {row['pair_count']}     ({row['pair_count']/len(buyer_promotion_counts)*100:.2f}%)")


Jumlah Pasangan Buyer-Promo Berdasarkan Banyaknya Penggunaan Promo yang Sama:
Buyer menggunakan satu kode promo sebanyak 5 kali: 2     (0.23%)
Buyer menggunakan satu kode promo sebanyak 4 kali: 1     (0.11%)
Buyer menggunakan satu kode promo sebanyak 3 kali: 4     (0.46%)
Buyer menggunakan satu kode promo sebanyak 2 kali: 44     (5.03%)
Buyer menggunakan satu kode promo sebanyak 1 kali: 823     (94.16%)


In [53]:
# Membuat histogram berdasarkan jumlah pasangan buyer-promo
fig = px.bar(
    usage_summary,
    x='promo_transaction_count',
    y='pair_count',
    title="Distribusi Penggunaan Promo yang Sama per Pengguna",
    labels={
        'promo_transaction_count': 'Jumlah Penggunaan Promo',
        'pair_count': 'Jumlah Pasangan Buyer-Promo'
    },
    template='plotly_white'
)
# Tambahkan label jumlah di setiap bar
fig.update_traces(
    texttemplate='%{y}',  # Menampilkan nilai di setiap bar
    textposition='outside'  # Posisi teks di atas bar
)
# Menampilkan histogram
fig.show()

### Transaksi dengan promo yang sama 3 kali atau lebih berturut-turut

In [54]:
# Pastikan kolom datetime dalam format datetime dan data diurutkan berdasarkan waktu
promo_transactions['transaction_created_datetime'] = pd.to_datetime(promo_transactions['transaction_created_datetime'])
promo_transactions = promo_transactions.sort_values(by=['buyer_id', 'dpt_promotion_id', 'transaction_created_datetime'])

# Deteksi penggunaan kode promo yang sama berturut-turut
promo_transactions['same_promo_as_previous'] = (
    (promo_transactions['buyer_id'] == promo_transactions['buyer_id'].shift(1)) &  # Buyer sama dengan transaksi sebelumnya
    (promo_transactions['dpt_promotion_id'] == promo_transactions['dpt_promotion_id'].shift(1))  # Promo sama dengan transaksi sebelumnya
)

# Tambahkan kolom consecutive_promo_count yang dimulai dari 1
promo_transactions['consecutive_promo_count'] = (
    promo_transactions.groupby(['buyer_id', 'dpt_promotion_id'])['same_promo_as_previous']
    .transform(lambda x: x.cumsum() + 1)  # Tambahkan 1 agar hitungan mulai dari 1
)

# Identifikasi group dengan transaksi berturut-turut melebihi threshold
threshold = 3
promo_transactions['max_consecutive_count'] = (
    promo_transactions.groupby(['buyer_id', 'dpt_promotion_id'])['consecutive_promo_count']
    .transform('max')
)

# Filter semua transaksi dalam group yang memenuhi threshold
buyers_with_consecutive_promos = promo_transactions[promo_transactions['max_consecutive_count'] >= threshold].copy()

# Menambahkan kolom untuk menghitung jangka waktu transaksi dari transaksi pertama hingga transaksi terakhir dalam hari
buyers_with_consecutive_promos.loc[:, 'transaction_duration_days'] = (
    buyers_with_consecutive_promos.groupby(['buyer_id', 'dpt_promotion_id'])['transaction_created_datetime']
    .transform(lambda x: (x.max() - x.min()).days)  # Hitung selisih tanggal pertama dan terakhir dalam hari
)

# Mengurutkan hasil berdasarkan buyer_id, dpt_promotion_id, dan transaction_created_datetime
buyers_with_consecutive_promos = buyers_with_consecutive_promos.sort_values(by=['buyer_id', 'dpt_promotion_id', 'transaction_created_datetime'])

# Tampilkan hasil
buyers_with_consecutive_promos[['buyer_id', 'dpt_promotion_id', 'transaction_created_datetime',
                                'consecutive_promo_count', 'max_consecutive_count', 'transaction_duration_days']]




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,buyer_id,dpt_promotion_id,transaction_created_datetime,consecutive_promo_count,max_consecutive_count,transaction_duration_days
39094,0fe36baa803c6718b95994af1e4a31cf1cbce48c09a623...,promotion-219036467,2023-06-07 12:32:31.812745,1,4,84
25824,0fe36baa803c6718b95994af1e4a31cf1cbce48c09a623...,promotion-219036467,2023-08-08 15:52:49.284590,2,4,84
10163,0fe36baa803c6718b95994af1e4a31cf1cbce48c09a623...,promotion-219036467,2023-08-16 14:34:01.650474,3,4,84
30906,0fe36baa803c6718b95994af1e4a31cf1cbce48c09a623...,promotion-219036467,2023-08-31 09:24:46.153034,4,4,84
32198,3077819ec94241590c88a38ed75fa3ef4bbe5a3328d805...,promotion-219036467,2023-07-18 11:00:42.967224,1,5,23
7470,3077819ec94241590c88a38ed75fa3ef4bbe5a3328d805...,promotion-219036467,2023-07-20 14:34:55.605941,2,5,23
10462,3077819ec94241590c88a38ed75fa3ef4bbe5a3328d805...,promotion-219036467,2023-07-21 14:22:18.049891,3,5,23
25497,3077819ec94241590c88a38ed75fa3ef4bbe5a3328d805...,promotion-219036467,2023-08-02 08:36:36.624141,4,5,23
16999,3077819ec94241590c88a38ed75fa3ef4bbe5a3328d805...,promotion-219036467,2023-08-10 17:38:57.159187,5,5,23
44297,313247bf47c68d46eb70c91ca20dd070b5a012bdaacaff...,promotion-463737578,2023-09-27 12:53:27.354660,1,3,63


In [70]:
# Menghitung selisih waktu antara transaksi pertama dan terakhir per buyer dan promo
buyers_with_consecutive_promos['first_transaction_time'] = buyers_with_consecutive_promos.groupby(['buyer_id', 'dpt_promotion_id'])['transaction_created_datetime'].transform('min')
buyers_with_consecutive_promos['last_transaction_time'] = buyers_with_consecutive_promos.groupby(['buyer_id', 'dpt_promotion_id'])['transaction_created_datetime'].transform('max')
buyers_with_consecutive_promos['time_diff_days'] = (buyers_with_consecutive_promos['last_transaction_time'] - buyers_with_consecutive_promos['first_transaction_time']).dt.total_seconds() / (60 * 60 * 24)

# Menambahkan rentang waktu ke legend
buyers_with_consecutive_promos['legend_label'] = buyers_with_consecutive_promos['buyer_id'].astype(str) + ' (Period: ' + buyers_with_consecutive_promos['time_diff_days'].round(1).astype(str) + ' days)'

# Visualisasi jumlah transaksi berturut-turut menggunakan grafik batang
fig1 = px.histogram(buyers_with_consecutive_promos,
                    x='consecutive_promo_count',
                    color='legend_label',  # Ganti color dengan legend_label yang mengandung rentang waktu
                    title='Distribusi Jumlah Transaksi Berturut-Turut Menggunakan Promo yang Sama per Buyer',
                    labels={'consecutive_promo_count': 'Jumlah Transaksi Berturut-Turut'},
                    color_discrete_sequence=px.colors.qualitative.Vivid)

# Update layout untuk menghilangkan count dari hover dan menambahkan label waktu pada legend
fig1.update_layout(barmode='stack',
                  xaxis_title="Jumlah Transaksi Berturut-Turut",
                  yaxis_title="Jumlah Pembeli",
                  xaxis={'type': 'category'},
                  showlegend=True)

# Menyembunyikan count dalam hover
# fig1.update_traces(hovertemplate='%{x}<br>Buyer: %{legend.text[0]}<br>')

fig1.show()
