In [190]:
import pandas as pd
from typing import Dict, List, Optional

class SimpleCouponPreprocessor:
    def __init__(self):
        # Frequency columns that should be imputed with 'never'
        self.frequency_cols = [
            'Bar', 'CoffeeHouse', 'CarryAway',
            'RestaurantLessThan20', 'Restaurant20To50'
        ]
        
        # Define forward mappings for categorical variables
        self.mappings = {
            # Time and Weather
            'time': {'7AM': 0, '10AM': 1, '2PM': 2, '6PM': 3, '10PM': 4},
            'weather': {'Sunny': 0, 'Rainy': 1, 'Snowy': 2},
            'temperature': {30: 0, 55: 1, 80: 2},
            'expiration': {'2h': 0, '1d': 1},
            
            # Demographics
            'age': {
                'below21': 0, '21': 1, '26': 2, '31': 3,
                '36': 4, '41': 5, '46': 6, '50plus': 7
            },
            'gender': {'Female': 0, 'Male': 1},
            'maritalStatus': {
                'Single': 0, 'Married partner': 1, 'Unmarried partner': 2,
                'Widowed': 3, 'Divorced': 4
            },
            'education': {
                'Some High School': 0,
                'High School Graduate': 1,
                'Some college - no degree': 2,
                'Associates degree': 3,
                'Bachelors degree': 4,
                'Graduate degree (Masters or Doctorate)': 5
            },
            'occupation': {
                'Unemployed': 0,
                'Architecture & Engineering': 1,
                'Student': 2,
                'Education&Training&Library': 3,
                'Healthcare Support': 4,
                'Healthcare Practitioners & Technical': 5,
                'Sales & Related': 6,
                'Management': 7,
                'Arts Design Entertainment Sports & Media': 8,
                'Computer & Mathematical': 9,
                'Life Physical Social Science': 10,
                'Personal Care & Service': 11,
                'Community & Social Services': 12,
                'Office & Administrative Support': 13,
                'Construction & Extraction': 14,
                'Legal': 15,
                'Retired': 16,
                'Installation Maintenance & Repair': 17,
                'Transportation & Material Moving': 18,
                'Business & Financial': 19,
                'Protective Service': 20,
                'Food Preparation & Serving Related': 21,
                'Production Occupations': 22,
                'Building & Grounds Cleaning & Maintenance': 23,
                'Farming Fishing & Forestry': 24
            },
            'income': {
                'Less than $12500': 0,
                '$12500 - $24999': 1,
                '$25000 - $37499': 2,
                '$37500 - $49999': 3,
                '$50000 - $62499': 4,
                '$62500 - $74999': 5,
                '$75000 - $87499': 6,
                '$87500 - $99999': 7,
                '$100000 or More': 8
            },
            
            # Location and Context
            'destination': {'No Urgent Place': 0, 'Home': 1, 'Work': 2},
            'passanger': {'Alone': 0, 'Friend(s)': 1, 'Kid(s)': 2, 'Partner': 3},
            'coupon': {
                'Restaurant(<20)': 0,
                'Coffee House': 1,
                'Carry out & Take away': 2,
                'Bar': 3,
                'Restaurant(20-50)': 4
            },
            
            # Frequency Variables
            'Bar': {'never': 0, 'less1': 1, '1~3': 2, '4~8': 3, 'gt8': 4},
            'CoffeeHouse': {'never': 0, 'less1': 1, '1~3': 2, '4~8': 3, 'gt8': 4},
            'CarryAway': {'never': 0, 'less1': 1, '1~3': 2, '4~8': 3, 'gt8': 4},
            'RestaurantLessThan20': {'never': 0, 'less1': 1, '1~3': 2, '4~8': 3, 'gt8': 4},
            'Restaurant20To50': {'never': 0, 'less1': 1, '1~3': 2, '4~8': 3, 'gt8': 4}
        }
        
        # Create reverse mappings automatically
        self.reverse_mappings = {
            col: {v: k for k, v in mapping.items()}
            for col, mapping in self.mappings.items()
        }

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """Transform the dataframe using the defined mappings"""
        # Create a copy to avoid modifying the original
        df_processed = df.copy()
        
        # Remove car column if it exists
        if 'car' in df_processed.columns:
            df_processed = df_processed.drop('car', axis=1)
        
        # Impute 'never' for frequency columns
        for col in self.frequency_cols:
            if col in df_processed.columns:
                df_processed[col] = df_processed[col].fillna('never')
        
        # Apply mappings
        for col, mapping in self.mappings.items():
            if col in df_processed.columns:
                df_processed[col] = df_processed[col].map(mapping)
        
        return df_processed

    def reverse_transform(self, df: pd.DataFrame, columns: Optional[List[str]] = None) -> pd.DataFrame:
        """Reverse transform specified columns back to their original categories"""
        df_reversed = df.copy()
        
        # If no columns specified, reverse all that have mappings
        if columns is None:
            columns = list(self.reverse_mappings.keys())
        
        # Apply reverse mappings for specified columns
        for col in columns:
            if col in df_reversed.columns and col in self.reverse_mappings:
                df_reversed[col] = df_reversed[col].map(self.reverse_mappings[col])
        
        return df_reversed

    def get_mappings(self) -> Dict:
        """Return the current mappings dictionary"""
        return self.mappings.copy()

    def get_reverse_mappings(self) -> Dict:
        """Return the current reverse mappings dictionary"""
        return self.reverse_mappings.copy()

In [191]:
import pandas as pd

# Load data
df = pd.read_csv('invehiclecouponrecommendation.csv')

In [210]:
df['Y'].mean()

0.5684326710816777

In [192]:
df['occupation'].unique()

array(['Unemployed', 'Architecture & Engineering', 'Student',
       'Education&Training&Library', 'Healthcare Support',
       'Healthcare Practitioners & Technical', 'Sales & Related',
       'Management', 'Arts Design Entertainment Sports & Media',
       'Computer & Mathematical', 'Life Physical Social Science',
       'Personal Care & Service', 'Community & Social Services',
       'Office & Administrative Support', 'Construction & Extraction',
       'Legal', 'Retired', 'Installation Maintenance & Repair',
       'Transportation & Material Moving', 'Business & Financial',
       'Protective Service', 'Food Preparation & Serving Related',
       'Production Occupations',
       'Building & Grounds Cleaning & Maintenance',
       'Farming Fishing & Forestry'], dtype=object)

In [193]:
preprocessor = SimpleCouponPreprocessor()
transformed_df = preprocessor.transform(df)

In [194]:
# Aggregate demographic and behavioral characteristics
data = pd.DataFrame()
# data['Demographic'] = transformed_df[['gender', 'age', 'maritalStatus', 'has_children', 'education','occupation','income']].astype(str).agg('-'.join, axis=1)
# data['Preference_Behavior'] = transformed_df[['Bar', 'CoffeeHouse', 'CarryAway', 
                                    # 'RestaurantLessThan20', 'Restaurant20To50']].astype(str).agg('-'.join, axis=1)
data['Demographic_Behavior'] = transformed_df[['gender', 'age', 'maritalStatus', 'has_children', 'education','occupation','income', 'Bar', 'CoffeeHouse', 'CarryAway', 
                                    'RestaurantLessThan20', 'Restaurant20To50']].astype(str).agg('-'.join, axis=1)
data['Y'] = transformed_df['Y']

# Group by 'Demographic' and 'Preference_Behavior', calculate observations and acceptance rate
grouped_data = data.groupby(['Demographic_Behavior']).agg(
    num_observations=('Y', 'count'),
    original_acceptance_rate=('Y', 'mean')
).reset_index()

# Add unique IDs for each group
grouped_data['CustomerGroup_ID'] = range(1, len(grouped_data) + 1)

# Rearrange columns for better readability
customer_df = grouped_data[['CustomerGroup_ID', 'Demographic_Behavior',
                          # 'Demographic', 'Preference_Behavior', 
                          'num_observations', 'original_acceptance_rate']]

# Display or save the resulting DataFrame
customer_df.head()


Unnamed: 0,CustomerGroup_ID,Demographic_Behavior,num_observations,original_acceptance_rate
0,1,0-0-0-0-1-0-0-0-0-3-3-0,22,0.409091
1,2,0-0-0-0-2-2-8-0-1-2-2-2,21,0.809524
2,3,0-0-0-0-2-2-8-0-2-3-3-2,22,0.636364
3,4,0-0-0-0-2-4-0-0-2-1-3-1,22,0.454545
4,5,0-0-0-0-4-2-1-4-2-1-3-4,22,0.681818


In [195]:
# Aggregate demographic and behavioral characteristics
data = pd.DataFrame()
# data['Demographic'] = transformed_df[['gender', 'age', 'maritalStatus', 'has_children', 'education','occupation','income']].astype(str).agg('-'.join, axis=1)
# data['Preference_Behavior'] = transformed_df[['Bar', 'CoffeeHouse', 'CarryAway', 
                                    # 'RestaurantLessThan20', 'Restaurant20To50']].astype(str).agg('-'.join, axis=1)
data['Situation'] = transformed_df[['destination', 'passanger', 'weather', 'temperature',
                             'time', 'toCoupon_GEQ5min', 'toCoupon_GEQ15min',
                             'toCoupon_GEQ25min', 'direction_same', 'direction_opp']].astype(str).agg('-'.join, axis=1)
data['Y'] = transformed_df['Y']

# Group by 'Demographic' and 'Preference_Behavior', calculate observations and acceptance rate
grouped_data = data.groupby(['Situation']).agg(
    num_observations=('Y', 'count'),
    acceptance_rate=('Y', 'mean')
).reset_index()

# Add unique IDs for each group
grouped_data['SituationGroup_ID'] = range(1, len(grouped_data) + 1)

# Rearrange columns for better readability
situation_df = grouped_data[['SituationGroup_ID', 'Situation']]
                          # 'Demographic', 'Preference_Behavior', 
                          # 'num_observations', 'acceptance_rate']]

# Display or save the resulting DataFrame
situation_df.head()


Unnamed: 0,SituationGroup_ID,Situation
0,1,0-0-0-0-1-1-1-0-0-1
1,2,0-0-0-1-1-1-0-0-0-1
2,3,0-0-0-1-2-1-0-0-0-1
3,4,0-0-0-1-3-1-0-0-0-1
4,5,0-0-0-1-4-1-0-0-0-1


In [196]:
# Aggregate demographic and behavioral characteristics
data = pd.DataFrame()
# data['Demographic'] = transformed_df[['gender', 'age', 'maritalStatus', 'has_children', 'education','occupation','income']].astype(str).agg('-'.join, axis=1)
# data['Preference_Behavior'] = transformed_df[['Bar', 'CoffeeHouse', 'CarryAway', 
                                    # 'RestaurantLessThan20', 'Restaurant20To50']].astype(str).agg('-'.join, axis=1)
data['Coupon Type'] = transformed_df[['coupon','expiration']].astype(str).agg('-'.join, axis=1)
data['Y'] = transformed_df['Y']

# Group by 'Demographic' and 'Preference_Behavior', calculate observations and acceptance rate
grouped_data = data.groupby(['Coupon Type']).agg(
    num_observations=('Y', 'count'),
    acceptance_rate=('Y', 'mean')
).reset_index()

# Add unique IDs for each group
grouped_data['CouponGroup_ID'] = range(1, len(grouped_data) + 1)

# Rearrange columns for better readability
coupon_df = grouped_data[['CouponGroup_ID', 'Coupon Type']]
                          # 'Demographic', 'Preference_Behavior', 
                          # 'num_observations', 'acceptance_rate']]

# Display or save the resulting DataFrame
coupon_df.head()


Unnamed: 0,CouponGroup_ID,Coupon Type
0,1,0-0
1,2,0-1
2,3,1-0
3,4,1-1
4,5,2-0


In [211]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import (
    classification_report, confusion_matrix, 
    roc_curve, precision_recall_curve, auc
)

# Load the transformed dataset
data = transformed_df

# Separate features and target
X = data.drop(columns=['Y'])  # Assuming 'Y' is the target column
y = data['Y']

# Get feature names for importance analysis later
feature_names = X.columns.tolist()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Define the parameter grid for Random Forest
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize the Random Forest classifier
rf = RandomForestClassifier(random_state=42)

# Perform grid search with cross-validation
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='f1', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get best model and parameters
best_rf = grid_search.best_estimator_
best_params = grid_search.best_params_

# Get cross-validation scores
cv_scores = cross_val_score(best_rf, X_train, y_train, cv=5, scoring='accuracy')
cv_mean = cv_scores.mean()
cv_std = cv_scores.std()

# Make predictions
y_pred = best_rf.predict(X_test)
y_pred_proba = best_rf.predict_proba(X_test)

# Get confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Get ROC curve data
fpr, tpr, _ = roc_curve(y_test, y_pred_proba[:, 1])
roc_auc = auc(fpr, tpr)

# Get PR curve data
precision, recall, _ = precision_recall_curve(y_test, y_pred_proba[:, 1])
pr_auc = auc(recall, precision)

# Get feature importance
feature_importance = pd.DataFrame({
    'feature': feature_names,
    'importance': best_rf.feature_importances_
}).sort_values('importance', ascending=False)

# Get classification report
class_report = classification_report(y_test, y_pred)

# Print all results
print("\n=== Best Parameters ===")
print(best_params)

print("\n=== Cross-validation Results ===")
print(f"Mean CV Score: {cv_mean:.3f} (+/- {cv_std * 2:.3f})")

print("\n=== Confusion Matrix ===")
print(conf_matrix)

print("\n=== ROC AUC Score ===")
print(f"ROC AUC: {roc_auc:.3f}")

print("\n=== PR AUC Score ===")
print(f"PR AUC: {pr_auc:.3f}")

print("\n=== Top 10 Feature Importance ===")
print(feature_importance.head(10))

print("\n=== Classification Report ===")
print(class_report)


=== Best Parameters ===
{'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}

=== Cross-validation Results ===
Mean CV Score: 0.751 (+/- 0.010)

=== Confusion Matrix ===
[[ 688  407]
 [ 253 1189]]

=== ROC AUC Score ===
ROC AUC: 0.809

=== PR AUC Score ===
PR AUC: 0.834

=== Top 10 Feature Importance ===
                 feature  importance
5                 coupon    0.107294
12            occupation    0.085538
13                income    0.074175
8                    age    0.064533
15           CoffeeHouse    0.064357
4                   time    0.056346
14                   Bar    0.053024
11             education    0.051886
16             CarryAway    0.049802
17  RestaurantLessThan20    0.048409

=== Classification Report ===
              precision    recall  f1-score   support

           0       0.73      0.63      0.68      1095
           1       0.74      0.82      0.78      1442

    accuracy                           0.74      2537
   m

In [198]:
customer_df.shape, situation_df.shape, coupon_df.shape

((567, 4), (121, 2), (10, 2))

In [199]:
# Combine customer groups with existing combinations
optimization_matrix = customer_df.merge(situation_df, how='cross')
optimization_matrix = optimization_matrix.merge(coupon_df, how='cross')

In [200]:
optimization_matrix.columns

Index(['CustomerGroup_ID', 'Demographic_Behavior', 'num_observations',
       'original_acceptance_rate', 'SituationGroup_ID', 'Situation',
       'CouponGroup_ID', 'Coupon Type'],
      dtype='object')

In [201]:
# Reverse map 'Demographic_Behavior' into individual columns
demographic_columns = ['gender', 'age', 'maritalStatus', 'has_children', 'education','occupation','income', 'Bar', 'CoffeeHouse', 'CarryAway', 
                                    'RestaurantLessThan20', 'Restaurant20To50']  # Adjust based on original features
situation_columns = ['destination', 'passanger', 'weather', 'temperature',
                             'time', 'toCoupon_GEQ5min', 'toCoupon_GEQ15min',
                             'toCoupon_GEQ25min', 'direction_same', 'direction_opp']
coupon_columns = ['coupon','expiration']

optimization_matrix[demographic_columns] = optimization_matrix['Demographic_Behavior'].str.split('-', expand=True)
optimization_matrix[situation_columns] = optimization_matrix['Situation'].str.split('-', expand=True)
optimization_matrix[coupon_columns] = optimization_matrix['Coupon Type'].str.split('-', expand=True)

# Define substrings and specific column names to remove
keys_to_match = ['Group_ID', 'num_observations', 'acceptance_rate']
additional_columns = ['Demographic_Behavior', 'Situation', 'Coupon Type']

# Identify columns to remove dynamically
columns_to_remove = [
    col for col in optimization_matrix.columns
    if any(key in col for key in keys_to_match) or col in additional_columns
]

# Drop the columns from the DataFrame
prediction_features = optimization_matrix.drop(columns=columns_to_remove)
prediction_features = prediction_features[X.columns]


In [202]:
prediction_features

Unnamed: 0,destination,passanger,weather,temperature,time,coupon,expiration,gender,age,maritalStatus,...,Bar,CoffeeHouse,CarryAway,RestaurantLessThan20,Restaurant20To50,toCoupon_GEQ5min,toCoupon_GEQ15min,toCoupon_GEQ25min,direction_same,direction_opp
0,0,0,0,0,1,0,0,0,0,0,...,0,0,3,3,0,1,1,0,0,1
1,0,0,0,0,1,0,1,0,0,0,...,0,0,3,3,0,1,1,0,0,1
2,0,0,0,0,1,1,0,0,0,0,...,0,0,3,3,0,1,1,0,0,1
3,0,0,0,0,1,1,1,0,0,0,...,0,0,3,3,0,1,1,0,0,1
4,0,0,0,0,1,2,0,0,0,0,...,0,0,3,3,0,1,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
686065,2,0,2,0,0,2,1,1,7,4,...,0,0,0,0,0,1,1,1,0,1
686066,2,0,2,0,0,3,0,1,7,4,...,0,0,0,0,0,1,1,1,0,1
686067,2,0,2,0,0,3,1,1,7,4,...,0,0,0,0,0,1,1,1,0,1
686068,2,0,2,0,0,4,0,1,7,4,...,0,0,0,0,0,1,1,1,0,1


In [203]:
# Make predictions
optimization_matrix['acceptance_probability'] = best_rf.predict_proba(prediction_features)[:, 1]

# Display the updated matrix
print(optimization_matrix.head())

   CustomerGroup_ID     Demographic_Behavior  num_observations  \
0                 1  0-0-0-0-1-0-0-0-0-3-3-0                22   
1                 1  0-0-0-0-1-0-0-0-0-3-3-0                22   
2                 1  0-0-0-0-1-0-0-0-0-3-3-0                22   
3                 1  0-0-0-0-1-0-0-0-0-3-3-0                22   
4                 1  0-0-0-0-1-0-0-0-0-3-3-0                22   

   original_acceptance_rate  SituationGroup_ID            Situation  \
0                  0.409091                  1  0-0-0-0-1-1-1-0-0-1   
1                  0.409091                  1  0-0-0-0-1-1-1-0-0-1   
2                  0.409091                  1  0-0-0-0-1-1-1-0-0-1   
3                  0.409091                  1  0-0-0-0-1-1-1-0-0-1   
4                  0.409091                  1  0-0-0-0-1-1-1-0-0-1   

   CouponGroup_ID Coupon Type gender age  ... temperature time  \
0               1         0-0      0   0  ...           0    1   
1               2         0-1      0   0  ..

In [204]:
# Calculate weighted acceptance scores
optimization_matrix['weighted_acceptance_score'] = (
    optimization_matrix['acceptance_probability'] * optimization_matrix['num_observations']
)


In [208]:
optimization_matrix.to_csv('optimization_matrix.csv')

In [206]:
from pulp import LpMaximize, LpProblem, LpVariable, lpSum

# Define the optimization problem
problem = LpProblem("Maximize_Weighted_Acceptance", LpMaximize)

# Create binary decision variables for each row in the matrix
optimization_matrix['decision_var'] = [
    LpVariable(f"Z_{i}", cat="Binary") for i in range(len(optimization_matrix))
]

# Objective: Maximize the total weighted acceptance score
problem += lpSum(
    var * row['weighted_acceptance_score']
    for var, (_, row) in zip(optimization_matrix['decision_var'], optimization_matrix.iterrows())
)

# Constraint 1: Each CustomerGroup_ID can receive at most one voucher
for customer_group in optimization_matrix['CustomerGroup_ID'].unique():
    group_rows = optimization_matrix[optimization_matrix['CustomerGroup_ID'] == customer_group]
    problem += lpSum(group_rows['decision_var']) <= 1

# Constraint 2: Total number of customers covered cannot exceed X
X = 100  # Replace with your total voucher limit
problem += lpSum(
    var * row['num_observations']
    for var, (_, row) in zip(optimization_matrix['decision_var'], optimization_matrix.iterrows())
) <= X

# Solve the optimization problem
problem.solve()

# Extract results
optimization_matrix['allocated'] = [
    var.value() for var in optimization_matrix['decision_var']
]

# Filter the rows with allocated vouchers
allocated_vouchers = optimization_matrix[optimization_matrix['allocated'] == 1]

# Display the allocated vouchers
print(allocated_vouchers[['CustomerGroup_ID', 'SituationGroup_ID', 'CouponGroup_ID', 
                          'acceptance_probability', 'num_observations', 'weighted_acceptance_score']])


        CustomerGroup_ID  SituationGroup_ID  CouponGroup_ID  \
144231               120                 25               2   
357135               296                 19               6   
358393               297                 24               4   
416731               345                 50               2   

        acceptance_probability  num_observations  weighted_acceptance_score  
144231                   1.000                22                      22.00  
357135                   1.000                22                      22.00  
358393                   1.000                22                      22.00  
416731                   0.995                34                      33.83  


In [207]:
from pulp import LpMaximize, LpProblem, LpVariable, lpSum

# Define the optimization problem
problem = LpProblem("Maximize_Weighted_Acceptance", LpMaximize)

# Create binary decision variables for each row in the matrix
optimization_matrix['decision_var'] = [
    LpVariable(f"Z_{i}", cat="Binary") for i in range(len(optimization_matrix))
]

# Objective: Maximize the total weighted acceptance score
problem += lpSum(
    var * row['weighted_acceptance_score']
    for var, (_, row) in zip(optimization_matrix['decision_var'], optimization_matrix.iterrows())
)

# Constraint 1: Each CustomerGroup_ID can receive at most one voucher
for customer_group in optimization_matrix['CustomerGroup_ID'].unique():
    group_rows = optimization_matrix[optimization_matrix['CustomerGroup_ID'] == customer_group]
    problem += lpSum(group_rows['decision_var']) <= 1

# Constraint 2: Total number of customers covered cannot exceed X
X = 100  # Replace with your total voucher limit
problem += lpSum(
    var * row['num_observations']
    for var, (_, row) in zip(optimization_matrix['decision_var'], optimization_matrix.iterrows())
) <= X

# Constraint 3: Limit for Bar coupons
bar_rows = optimization_matrix[optimization_matrix['coupon'] == 1]
problem += lpSum(
    var * row['num_observations']
    for var, (_, row) in zip(bar_rows['decision_var'], bar_rows.iterrows())
) <= 50

# Constraint 4: Limit for Coffee Shop coupons
coffee_shop_rows = optimization_matrix[optimization_matrix['coupon'] == 3]
problem += lpSum(
    var * row['num_observations']
    for var, (_, row) in zip(coffee_shop_rows['decision_var'], coffee_shop_rows.iterrows())
) <= 50

# Solve the optimization problem
problem.solve()

# Extract results
optimization_matrix['allocated'] = [
    var.value() for var in optimization_matrix['decision_var']
]

# Filter the rows with allocated vouchers
allocated_vouchers = optimization_matrix[optimization_matrix['allocated'] == 1]

# Display the allocated vouchers
print(allocated_vouchers[['CustomerGroup_ID', 'SituationGroup_ID', 'CouponGroup_ID', 
                          'acceptance_probability', 'num_observations', 'weighted_acceptance_score']])


        CustomerGroup_ID  SituationGroup_ID  CouponGroup_ID  \
144231               120                 25               2   
357135               296                 19               6   
358393               297                 24               4   
416731               345                 50               2   

        acceptance_probability  num_observations  weighted_acceptance_score  
144231                   1.000                22                      22.00  
357135                   1.000                22                      22.00  
358393                   1.000                22                      22.00  
416731                   0.995                34                      33.83  
