# Importing Libraries

In [548]:
%pip install category_encoders

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [549]:
# Pake yang ini
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from category_encoders import TargetEncoder
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.impute import KNNImputer
from sklearn.preprocessing import OrdinalEncoder
from catboost import CatBoostClassifier
from sklearn.ensemble import StackingClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression


from sklearn.model_selection import train_test_split


from fast_ml.model_development import train_valid_test_split


# Data Extraction

In [550]:
TRAIN_FEATURE_PATH = r'../../Datasets/train_features.csv'
TRAIN_LABEL_PATH = r'../../Datasets/train_labels.csv'
TEST_PATH = r'../../Datasets/test_features.csv'
SAMPLE_SUBMISSION_PATH = r"../../Datasets/submission_format.csv"

In [551]:
train_feature_dat = pd.read_csv(TRAIN_FEATURE_PATH)
train_label_dat = pd.read_csv(TRAIN_LABEL_PATH)
test_dat = pd.read_csv(TEST_PATH)

In [552]:
train_feature_dat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3817 entries, 0 to 3816
Data columns (total 16 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   tahun_kelahiran          3817 non-null   int64  
 1   pendidikan               3628 non-null   object 
 2   status_pernikahan        3605 non-null   object 
 3   pendapatan               3627 non-null   float64
 4   jumlah_anak_balita       3627 non-null   float64
 5   jumlah_anak_remaja       3613 non-null   float64
 6   terakhir_belanja         3645 non-null   float64
 7   belanja_buah             3636 non-null   float64
 8   belanja_daging           3639 non-null   float64
 9   belanja_ikan             3624 non-null   float64
 10  belanja_kue              3603 non-null   float64
 11  pembelian_diskon         3639 non-null   float64
 12  pembelian_web            3652 non-null   float64
 13  pembelian_toko           3648 non-null   float64
 14  keluhan                 

# Data Prep


In [553]:
train_feature_dat = pd.merge(train_feature_dat, train_label_dat, left_index=True, right_index=True)

## Dropping Irrelevant features

In [554]:
#drop tanggal_menjadi_anggota
train_feature_dat = train_feature_dat.drop(columns={'tanggal_menjadi_anggota'})
test_dat = test_dat.drop(columns={'tanggal_menjadi_anggota'})

In [555]:
train_feature_dat = train_feature_dat.drop(columns={'keluhan', 'jumlah_anak_balita', 'jumlah_anak_remaja'})
test_dat = test_dat.drop(columns={'keluhan', 'jumlah_anak_balita', 'jumlah_anak_remaja'})

In [556]:
# #drop tanggal_menjadi_anggota & Belanjaan
# train_feature_dat = train_feature_dat.drop(columns={'tanggal_menjadi_anggota', 'belanja_buah', 'belanja_daging', 'belanja_ikan', 'belanja_kue'})
# test_dat = test_dat.drop(columns={'tanggal_menjadi_anggota', 'belanja_buah', 'belanja_daging', 'belanja_ikan', 'belanja_kue'})

## Encoding

### Label Encoder

In [557]:
label_encoder = LabelEncoder()
train_feature_dat['pendidikan_encoded'] = label_encoder.fit_transform(train_feature_dat['pendidikan'])
train_feature_dat =train_feature_dat.drop(columns='pendidikan')
test_dat['pendidikan_encoded'] = label_encoder.fit_transform(test_dat['pendidikan'])
test_dat =test_dat.drop(columns='pendidikan')

train_feature_dat['status_pernikahan_encoded'] = label_encoder.fit_transform(train_feature_dat['status_pernikahan'])
train_feature_dat =train_feature_dat.drop(columns='status_pernikahan')
test_dat['status_pernikahan_encoded'] = label_encoder.fit_transform(test_dat['status_pernikahan'])
test_dat =test_dat.drop(columns='status_pernikahan')


## Binning

In [558]:
# Define bin edges and labels
bin_edges = [1890, 1920, 1940, 1960, 1980, 2000, 2010]
bin_labels = ['0', '1', '2', '3', '4', '5']

# Perform binning
train_feature_dat['tahun_kelahiran_binned'] = pd.cut(train_feature_dat['tahun_kelahiran'], bins=bin_edges, labels=bin_labels)
train_feature_dat.drop(columns='tahun_kelahiran', inplace=True)
test_dat['tahun_kelahiran_binned'] = pd.cut(test_dat['tahun_kelahiran'], bins=bin_edges, labels=bin_labels)
test_dat.drop(columns='tahun_kelahiran', inplace=True)

## IQR

In [559]:
def handle_outliers_iqr(data):
    # Calculate quartiles
    Q1 = np.percentile(data, 25)
    Q3 = np.percentile(data, 75)
    IQR = Q3 - Q1
    
    # Calculate lower and upper bounds
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    # Handle outliers
    # Replace outliers with the upper or lower bound
    data[data < lower_bound] = lower_bound
    data[data > upper_bound] = upper_bound
    
    return data

for column in train_feature_dat.select_dtypes(include=np.number):
    if column != 'jumlah_promosi':
        train_feature_dat[column] = handle_outliers_iqr(train_feature_dat[column])
        test_dat[column] = handle_outliers_iqr(test_dat[column])


## Null Handling

### Simple Imputer

In [560]:
from sklearn.impute import SimpleImputer
import pandas as pd

# Membuat objek SimpleImputer untuk data pelatihan dengan strategi 'median'
imputer_median = SimpleImputer(strategy='median')

# Mengisi nilai yang hilang dalam data pelatihan dengan strategi 'median'
train_feature_dat = pd.DataFrame(imputer_median.fit_transform(train_feature_dat), columns=train_feature_dat.columns)

# Membuat objek SimpleImputer untuk data pengujian dengan strategi 'most_frequent'
imputer_most_frequent = SimpleImputer(strategy='most_frequent')

# Mengambil kolom 'ID' dari df_test
test_dat_id = test_dat['ID']

# Menghapus kolom 'ID' dari df_test
test_dat_features = test_dat.drop('ID', axis=1)

# Melakukan imputasi nilai yang hilang dalam data pengujian kecuali pada kolom 'ID' dengan strategi 'most_frequent'
test_dat_features = pd.DataFrame(imputer_median.fit_transform(test_dat_features), columns=test_dat_features.columns)

# Menggabungkan kembali kolom 'ID' dengan data yang telah diimputasi
test_dat = pd.concat([test_dat_id, test_dat_features], axis=1)


In [561]:
train_feature_dat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3817 entries, 0 to 3816
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   pendapatan                 3817 non-null   float64
 1   terakhir_belanja           3817 non-null   float64
 2   belanja_buah               3817 non-null   float64
 3   belanja_daging             3817 non-null   float64
 4   belanja_ikan               3817 non-null   float64
 5   belanja_kue                3817 non-null   float64
 6   pembelian_diskon           3817 non-null   float64
 7   pembelian_web              3817 non-null   float64
 8   pembelian_toko             3817 non-null   float64
 9   jumlah_promosi             3817 non-null   float64
 10  pendidikan_encoded         3817 non-null   float64
 11  status_pernikahan_encoded  3817 non-null   float64
 12  tahun_kelahiran_binned     3817 non-null   float64
dtypes: float64(13)
memory usage: 387.8 KB


### Mean for Numerical, Mode for Categorical

In [562]:
# # fill all null values with mean and mode
# train_feature_dat.fillna(train_feature_dat.mean(), inplace=True)
# test_dat.fillna(test_dat.mean(), inplace=True)

# train_feature_dat.fillna(train_feature_dat.mode().iloc[0], inplace=True)
# test_dat.fillna(test_dat.mode().iloc[0], inplace=True)
# train_feature_dat.info()

### Median for Numerical, Mode for Categorical

In [563]:
# # fill all null values with median and mode
# train_feature_dat.fillna(train_feature_dat.median(), inplace=True)
# test_dat.fillna(test_dat.median(), inplace=True)

# train_feature_dat.fillna(train_feature_dat.mode().iloc[0], inplace=True)
# test_dat.fillna(test_dat.mode().iloc[0], inplace=True)
# train_feature_dat.info()

### KNN Imputer

In [564]:
# categorical_columns = train_feature_dat.select_dtypes(include=['object']).columns

# # Encode categorical features into numerical format
# encoder = OrdinalEncoder()
# train_feature_dat[categorical_columns] = encoder.fit_transform(train_feature_dat[categorical_columns])

# # Apply KNN imputer to impute missing values
# imputer = KNNImputer(n_neighbors=5)
# train_feature_dat = pd.DataFrame(imputer.fit_transform(train_feature_dat), columns=train_feature_dat.columns)

# # Decode the imputed numerical values back to categorical values
# train_feature_dat[categorical_columns] = encoder.inverse_transform(train_feature_dat[categorical_columns].astype(int))


In [565]:
# # Mengambil kolom 'ID' dari df_test
# test_dat_id = test_dat['ID']

# # Menghapus kolom 'ID' dari test_dat
# test_dat = test_dat.drop('ID', axis=1)

# # Encode categorical features into numerical format
# encoder = OrdinalEncoder()
# test_dat[categorical_columns] = encoder.fit_transform(test_dat[categorical_columns])

# # Apply KNN imputer to impute missing values
# imputer = KNNImputer(n_neighbors=5)
# test_dat = pd.DataFrame(imputer.fit_transform(test_dat), columns=test_dat.columns)

# # Decode the imputed numerical values back to categorical values
# test_dat[categorical_columns] = encoder.inverse_transform(test_dat[categorical_columns].astype(int))

# # Menggabungkan kembali kolom 'ID' dengan data yang telah diimputasi
# test_dat = pd.concat([test_dat_id, test_dat], axis=1)

## Outlier Handling

### Windsorizer

In [566]:
# #windsorizer
# def windsorize_by_percentage(data, lower_percentile, upper_percentile):
#     lower_bound = np.percentile(data, lower_percentile)
#     upper_bound = np.percentile(data, upper_percentile)
#     windsorized_data = []
#     for value in data:
#         if value < lower_bound:
#             windsorized_data.append(lower_bound)
#         elif value > upper_bound:
#             windsorized_data.append(upper_bound)
#         else:
#             windsorized_data.append(value)

#     return windsorized_data

# # Specify lower and upper percentiles
# lower_percentile = 10
# upper_percentile = 90

# for column in train_feature_dat.select_dtypes(include=np.number):
#     train_feature_dat[column] = windsorize_by_percentage(train_feature_dat[column], lower_percentile, upper_percentile)
#     test_dat[column] = windsorize_by_percentage(test_dat[column], lower_percentile, upper_percentile)



### IQR

In [567]:
# def handle_outliers_iqr(data):
#     # Calculate quartiles
#     Q1 = np.percentile(data, 25)
#     Q3 = np.percentile(data, 75)
#     IQR = Q3 - Q1
    
#     # Calculate lower and upper bounds
#     lower_bound = Q1 - 1.5 * IQR
#     upper_bound = Q3 + 1.5 * IQR
    
#     # Handle outliers
#     # Replace outliers with the upper or lower bound
#     data[data < lower_bound] = lower_bound
#     data[data > upper_bound] = upper_bound
    
#     return data

# for column in train_feature_dat.select_dtypes(include=np.number):
#     if column != 'jumlah_promosi':
#         train_feature_dat[column] = handle_outliers_iqr(train_feature_dat[column])
#         test_dat[column] = handle_outliers_iqr(test_dat[column])


## Encoding

### One hot encoding

In [568]:
# # Perform one-hot encoding
# train_dat = pd.get_dummies(train_dat, columns=['attribute_0', 'attribute_1'])
# test_dat = pd.get_dummies(test_dat, columns=['attribute_0', 'attribute_1'])

### Label Encoding

In [569]:
# label_encoder = LabelEncoder()
# train_feature_dat['pendidikan_encoded'] = label_encoder.fit_transform(train_feature_dat['pendidikan'])
# train_feature_dat =train_feature_dat.drop(columns='pendidikan')
# test_dat['pendidikan_encoded'] = label_encoder.fit_transform(test_dat['pendidikan'])
# test_dat =test_dat.drop(columns='pendidikan')

# train_feature_dat['status_pernikahan_encoded'] = label_encoder.fit_transform(train_feature_dat['status_pernikahan'])
# train_feature_dat =train_feature_dat.drop(columns='status_pernikahan')
# test_dat['status_pernikahan_encoded'] = label_encoder.fit_transform(test_dat['status_pernikahan'])
# test_dat =test_dat.drop(columns='status_pernikahan')


In [570]:
# train_feature_dat.to_csv('../../Datasets/cleaned.csv', index=False)

# Feature Engineering

## Binning Tahun Kelahiran

In [571]:
# # Define bin edges and labels
# bin_edges = [1890, 1920, 1940, 1960, 1980, 2000, 2010]
# bin_labels = ['0', '1', '2', '3', '4', '5']

# # Perform binning
# train_feature_dat['tahun_kelahiran_binned'] = pd.cut(train_feature_dat['tahun_kelahiran'], bins=bin_edges, labels=bin_labels)
# train_feature_dat.drop(columns='tahun_kelahiran', inplace=True)
# train_feature_dat['tahun_kelahiran_binned'] = train_feature_dat['tahun_kelahiran_binned'].astype('int')

# test_dat['tahun_kelahiran_binned'] = pd.cut(test_dat['tahun_kelahiran'], bins=bin_edges, labels=bin_labels)
# test_dat.drop(columns='tahun_kelahiran', inplace=True)
# test_dat['tahun_kelahiran_binned'] = test_dat['tahun_kelahiran_binned'].astype('int')

## Binning Terakhir Belanja

In [572]:
# # Choose the number of bins
# num_bins = 5

# # Bin the data using equal-width binning
# train_feature_dat['terakhir_belanja_bins'] = pd.cut(train_feature_dat['terakhir_belanja'], bins=num_bins, labels=False)
# # train_feature_dat.drop(columns='terakhir_belanja', inplace=True)

# test_dat['terakhir_belanja_bins'] = pd.cut(test_dat['terakhir_belanja'], bins=num_bins, labels=False)
# # test_dat.drop(columns='terakhir_belanja', inplace=True)

In [573]:
train_feature_dat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3817 entries, 0 to 3816
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   pendapatan                 3817 non-null   float64
 1   terakhir_belanja           3817 non-null   float64
 2   belanja_buah               3817 non-null   float64
 3   belanja_daging             3817 non-null   float64
 4   belanja_ikan               3817 non-null   float64
 5   belanja_kue                3817 non-null   float64
 6   pembelian_diskon           3817 non-null   float64
 7   pembelian_web              3817 non-null   float64
 8   pembelian_toko             3817 non-null   float64
 9   jumlah_promosi             3817 non-null   float64
 10  pendidikan_encoded         3817 non-null   float64
 11  status_pernikahan_encoded  3817 non-null   float64
 12  tahun_kelahiran_binned     3817 non-null   float64
dtypes: float64(13)
memory usage: 387.8 KB


In [574]:
# train_feature_dat.to_csv('../../Datasets/cleaned.csv', index=False)

# SPLIT TRAIN AND TEST

In [575]:
# Train test split
X = train_feature_dat.drop(columns='jumlah_promosi')
y = train_feature_dat['jumlah_promosi']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Target Encoding

In [576]:
# # Step 3: Calculate statistics and encode
# encoder = TargetEncoder()
# X_train = encoder.fit_transform(X_train, y_train)

# # Step 4: Apply encoding to validation and test sets
# X_test = encoder.transform(X_test)
# test_dat = encoder.transform(test_dat)

# Imbalance Handling

## Under + Oversampling (GAGAL)

In [577]:
# from imblearn.combine import SMOTEENN

# # Create an instance of SMOTEENN
# smote_enn = SMOTEENN(random_state=42)

# # Fit and transform the dataset
# X_train, y_train = smote_enn.fit_resample(X_train, y_train)


## Oversampling

In [578]:
from imblearn.over_sampling import SMOTE

# Apply SMOTE for oversampling
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

## Undersampling

In [579]:
# from imblearn.under_sampling import RandomUnderSampler

# # Create an instance of RandomUnderSampler
# undersampler = RandomUnderSampler(random_state=42)

# # Fit and transform the dataset
# X_train, y_train = undersampler.fit_resample(X_train, y_train)


# FEATURE SCALING

In [580]:
# scaler = MinMaxScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

In [581]:
print(X_train.shape)
print(X_test.shape)

(5516, 12)
(764, 12)


#  MODEL

In [582]:
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
import statsmodels.api as sm

def metrics(y_true, y_pred):
    print("F1 Score  :", f1_score(y_true, y_pred, average='macro'))

def train_eval_models(models: dict, X_train, X_test, y_train, y_test):
    for model in models:
        m = model
        m.fit(X_train, y_train)
        y_pred = m.predict(X_test)
        print(model.__class__.__name__, models[model])
        metrics(y_test, y_pred)

## RF, Gradient Boost, XGBosst, CatBoost

In [583]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=300, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)


from sklearn.ensemble import GradientBoostingClassifier

gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)

# Inisialisasi model XGBoost Classifier
xgb_model = XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)


catboost = CatBoostClassifier(iterations=1000, depth=6, learning_rate=0.1, loss_function='MultiClass')
catboost.fit(X_train, y_train, eval_set=(X_test, y_test), early_stopping_rounds=50, verbose=100)
y_pred_cat = catboost.predict(X_test)



0:	learn: 1.9077030	test: 1.9217883	best: 1.9217883 (0)	total: 11ms	remaining: 11s
100:	learn: 0.9611554	test: 1.3081729	best: 1.3081729 (100)	total: 920ms	remaining: 8.19s


In [None]:
from sklearn.metrics import f1_score

# Evaluasi kinerja Random Forest Classifier
f1_macro_rf = f1_score(y_test, y_pred_rf, average='macro')
print("F1-score Macro untuk Random Forest Classifier:", f1_macro_rf)

# Evaluasi kinerja Gradient Boosting Classifier
f1_macro_gb = f1_score(y_test, y_pred_gb, average='macro')
print("F1-score Macro untuk Gradient Boosting Classifier:", f1_macro_gb)

# Evaluasi kinerja XGradient Boosting Classifier
f1_macro_xgb = f1_score(y_test, y_pred_xgb, average='macro')
print("F1-score Macro untuk XGBClassifier:", f1_macro_xgb)

# Evaluasi kinerja XGradient Boosting Classifier
f1_macro_cat = f1_score(y_test, y_pred_cat, average='macro')
print("F1-score Macro untuk catboostClassifier:", f1_macro_cat)



F1-score Macro untuk Random Forest Classifier: 0.7429063887338068
F1-score Macro untuk Gradient Boosting Classifier: 0.5486239202957861
F1-score Macro untuk XGBClassifier: 0.6584122298392767
F1-score Macro untuk catboostClassifier: 0.690625684410643


## CV score for RF

In [None]:
from sklearn.model_selection import cross_val_score

# Cross-validation untuk Random Forest
cv_scores_rf = cross_val_score(rf_model, X_train, y_train, cv=5, scoring='f1_macro')
print("Cross-Validation Scores - Random Forest:", cv_scores_rf)


Cross-Validation Scores - Random Forest: [0.80374965 0.81417796 0.85696745 0.88918057 0.89906027]


## Stacking

In [None]:
# Meta-model dan base-models
meta_model = LogisticRegression()
base_models = [
    ('rf', RandomForestClassifier(n_estimators=300, random_state=42)),
    ('xgb', XGBClassifier(n_estimators=100, random_state=42)),
    ('gb', GradientBoostingClassifier(n_estimators=100, random_state=42)),
    ('cb', CatBoostClassifier(iterations=1000, depth=6, learning_rate=0.1, loss_function='MultiClass'))
]

# Membuat Stacking Classifier
stacking_clf = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)

# Latih Stacking Classifier
stacking_clf.fit(X_train, y_train)

# Prediksi dan evaluasi
y_pred = stacking_clf.predict(X_test)

# Evaluasi dengan F1-score Macro
f1_macro = f1_score(y_test, y_pred, average='macro')
print("F1-score Macro untuk Stacking Classifier:", f1_macro)

0:	learn: 1.9077030	total: 12.3ms	remaining: 12.3s
1:	learn: 1.8723584	total: 23.1ms	remaining: 11.5s
2:	learn: 1.8396387	total: 34.2ms	remaining: 11.4s
3:	learn: 1.8125340	total: 46ms	remaining: 11.4s
4:	learn: 1.7853754	total: 56.4ms	remaining: 11.2s
5:	learn: 1.7617593	total: 67.5ms	remaining: 11.2s
6:	learn: 1.7361924	total: 78.8ms	remaining: 11.2s
7:	learn: 1.7115863	total: 89.4ms	remaining: 11.1s
8:	learn: 1.6880182	total: 99.7ms	remaining: 11s
9:	learn: 1.6664170	total: 111ms	remaining: 11s
10:	learn: 1.6467476	total: 121ms	remaining: 10.9s
11:	learn: 1.6293243	total: 131ms	remaining: 10.8s
12:	learn: 1.6132752	total: 143ms	remaining: 10.9s
13:	learn: 1.5977815	total: 153ms	remaining: 10.8s
14:	learn: 1.5837766	total: 163ms	remaining: 10.7s
15:	learn: 1.5704953	total: 173ms	remaining: 10.7s
16:	learn: 1.5606752	total: 183ms	remaining: 10.6s
17:	learn: 1.5508896	total: 193ms	remaining: 10.5s
18:	learn: 1.5362059	total: 202ms	remaining: 10.4s
19:	learn: 1.5190932	total: 213ms	rema

In [None]:
# from sklearn.model_selection import cross_val_score

# # Cross-validation untuk Stacking Classifier
# cv_scores_stacking = cross_val_score(stacking_clf, X_train, y_train, cv=5, scoring='f1_macro')
# print("Cross-Validation Scores - Stacking Classifier:", cv_scores_stacking)


# Kaggle Submission


In [547]:
submission = pd.read_csv(SAMPLE_SUBMISSION_PATH)
test_dat.drop(columns='ID', inplace=True)
submission['jumlah_promosi'] = stacking_clf.predict(test_dat)
submission['jumlah_promosi'] = submission['jumlah_promosi'].astype(int)
submission.to_csv('../submissions/stackingclf.csv', index=False)

KeyError: "['ID'] not found in axis"

In [543]:
SUBMIT_PATH = '../submissions/stackingclf.csv'

csv = pd.read_csv(SUBMIT_PATH)

In [544]:
category_counts = csv['jumlah_promosi'].value_counts().sort_index()
category_counts

0.0    1200
1.0     394
2.0     287
3.0     457
4.0     543
5.0     589
6.0     348
Name: jumlah_promosi, dtype: int64