<a href="https://colab.research.google.com/github/fkhandley/msds6925/blob/main/MSDS6825_practicum_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import MinMaxScaler
import time

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
dtype_dict = {
    'order_id': 'string',
    'user_id': 'string',
    'payment_method': 'category',
    'user_zipcode_current': 'category',
    'gender': 'category',
    'product_category': 'category',
    'strain_type': 'category'
}

In [4]:
orders_load = pd.read_csv('/content/drive/MyDrive/orders.csv', dtype=dtype_dict)

In [5]:
orders_load['delivery_datetime'] = pd.to_datetime(orders_load['delivery_datetime'])

In [6]:
user_orders = orders_load.groupby('user_id').size().reset_index(name='order_count')
print(f"Original number of unique users: {len(user_orders)}")
user_orders5 = user_orders[user_orders['order_count'] >= 5]
print(f"Users with 5+ orders: {len(user_orders5)}")
users5 = user_orders5['user_id'].unique()
user_size = round(len(users5)*0.01,0)
print(f"Taking sample of {user_size} users")
user_sample = np.random.choice(users5, size=int(user_size), replace=False)
orders_load = orders_load[orders_load['user_id'].isin(user_sample)]
final_orders_per_user = orders_load.groupby('user_id').size()
print("\nFinal orders per user:")
print(final_orders_per_user.describe())

Original number of unique users: 884901
Users with 5+ orders: 265985
Taking sample of 2660.0 users

Final orders per user:
count    2660.000000
mean       20.936466
std        27.205639
min         5.000000
25%         7.000000
50%        11.000000
75%        23.000000
max       301.000000
dtype: float64


In [7]:
freq_df = orders_load[['order_id','user_id','delivery_datetime']].copy()
freq_df.drop_duplicates('order_id', inplace=True)
freq_df.sort_values('delivery_datetime', inplace=True)

# Create the shifted dates within each user group
freq_df['last_delivery_date'] = freq_df.groupby('user_id')['delivery_datetime'].shift(1)

# Calculate the days between orders
freq_df['days_since_order'] = (freq_df['delivery_datetime'] - freq_df['last_delivery_date']).dt.days

In [8]:
avg_freq_user = freq_df.groupby('user_id')['days_since_order'].mean().reset_index()
avg_freq_user.rename(columns={'days_since_order':'avg_freq'}, inplace=True)
orders_load = orders_load.merge(freq_df[['order_id','days_since_order']], on='order_id', how='left').merge(avg_freq_user[['user_id','avg_freq']], on='user_id', how='left')

In [9]:
del freq_df, avg_freq_user

In [10]:
orders_load.head()

Unnamed: 0,order_id,user_id,delivery_datetime,wait_time,promo_credit,order_price_total,promo_code,payment_method,days_since_order,avg_freq
0,8966610,1414581,2021-05-22 03:13:08.870,59.51,0.0,80.0,,POB,22.0,45.078947
1,11025368,648498,2022-08-22 03:07:22.800,28.21,0.0,90.48,,ACH,60.0,36.65
2,7040901,605563,2020-05-19 20:51:17.153,28.13,0.0,122.49,,ACH,,69.666667
3,7356299,988922,2020-07-22 22:46:25.150,25.45,0.0,183.0,,Cash,5.0,8.175824
4,8853404,988922,2021-04-27 04:10:31.570,65.1,0.0,247.0,,Cash,5.0,8.175824


# Key input: days of orders lookback

In [11]:
end_date = orders_load['delivery_datetime'].max()
#start_date = end_date - timedelta(days=4500000)

#orders_reduced = orders_load[orders_load['delivery_datetime']>=start_date]
orders_reduced = orders_load.copy()

In [12]:
del orders_load

In [13]:
users_load = pd.read_csv('/content/drive/MyDrive/users.csv', dtype=dtype_dict)

In [14]:
orders = orders_reduced.merge(users_load, on='user_id', how='left')

In [15]:
del users_load, orders_reduced

In [16]:
items_load = pd.read_csv('/content/drive/MyDrive/items.csv', dtype=dtype_dict)
items_load.drop('user_id', inplace=True, axis=1)

In [17]:
category_map = {'Accessories':'other',
                'Drops':'other',
                'Concentrates':'other',
                'Topicals':'other',
                'Prerolls':'prerolls',
                'Vaporizers':'vaporizers',
                'Flowers':'flower',
                'Edibles':'edibles'}

items_load['product_category'] = items_load['product_category'].map(category_map)

In [18]:
items_groupby = items_load.groupby(['order_id','product_category']).agg({'quantity':'sum'}).reset_index()

reshaped_items = (items_groupby.pivot(index='order_id',columns='product_category',values=['quantity']).reset_index())

In [19]:
reshaped_items.columns = reshaped_items.columns.droplevel(0)
reshaped_items = reshaped_items.reset_index()
reshaped_items = reshaped_items.rename(columns={'': 'order_id'})


reshaped_items.head()

product_category,index,order_id,edibles,flower,other,prerolls,vaporizers
0,0,10000000,3.0,,,,
1,1,10000001,,,,1.0,1.0
2,2,10000002,2.0,,,2.0,1.0
3,3,10000003,2.0,2.0,,,
4,4,10000004,,2.0,1.0,,


In [20]:
reshaped = reshaped_items[['order_id','edibles','flower','prerolls','vaporizers']]
reshaped.head()

product_category,order_id,edibles,flower,prerolls,vaporizers
0,10000000,3.0,,,
1,10000001,,,1.0,1.0
2,10000002,2.0,,2.0,1.0
3,10000003,2.0,2.0,,
4,10000004,,2.0,,


In [21]:
orders = orders.merge(reshaped, on='order_id', how='left')

In [22]:
orders = orders.drop('promo_code', axis=1)
orders['account_created_at'] = pd.to_datetime(orders['account_created_at'])
orders['birthdate'] = pd.to_datetime(orders['birthdate'])

In [23]:
orders.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55691 entries, 0 to 55690
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   order_id              55691 non-null  string        
 1   user_id               55691 non-null  string        
 2   delivery_datetime     55691 non-null  datetime64[ns]
 3   wait_time             55684 non-null  float64       
 4   promo_credit          55691 non-null  float64       
 5   order_price_total     55691 non-null  float64       
 6   payment_method        55691 non-null  category      
 7   days_since_order      53031 non-null  float64       
 8   avg_freq              55691 non-null  float64       
 9   user_zipcode_current  55691 non-null  category      
 10  account_created_at    55691 non-null  datetime64[ns]
 11  birthdate             55389 non-null  datetime64[ns]
 12  gender                41456 non-null  category      
 13  edibles         

In [24]:
del items_load, items_groupby, reshaped_items, reshaped

In [25]:
orders['age_at_purchase'] = round(orders.delivery_datetime.dt.year - orders.birthdate.dt.year)

orders['account_age_at_purchase'] =  (orders.delivery_datetime.dt.year - orders.account_created_at.dt.year)*12+orders.delivery_datetime.dt.month - orders.account_created_at.dt.month

In [26]:
orders['weekday'] = orders.delivery_datetime.dt.dayofweek
orders['hour'] = orders.delivery_datetime.dt.hour
orders['month'] = orders.delivery_datetime.dt.month

In [27]:
payment_map = {'POB':'card',
               'ACH':'ach',
               'Cash':'cash',
               'Pin Debit':'card',
               'Online Debit':'card',
               'Account Credit':'promotion'}

orders.payment_method = orders.payment_method.map(payment_map)

In [28]:
orders['is_payment_ach'] = (orders['payment_method'] == 'ach').astype(int)
orders['is_payment_card'] = (orders['payment_method'] == 'card').astype(int)
orders['is_payment_cash'] = (orders['payment_method'] == 'cash').astype(int)
orders['is_payment_promotion'] = (orders['payment_method'] == 'promotion').astype(int)

In [29]:
orders['is_male'] = (orders['gender'] == 'Male').astype(int)
orders['is_female'] = (orders['gender'] == 'Female').astype(int)

In [30]:
orders.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55691 entries, 0 to 55690
Data columns (total 28 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   order_id                 55691 non-null  string        
 1   user_id                  55691 non-null  string        
 2   delivery_datetime        55691 non-null  datetime64[ns]
 3   wait_time                55684 non-null  float64       
 4   promo_credit             55691 non-null  float64       
 5   order_price_total        55691 non-null  float64       
 6   payment_method           55691 non-null  object        
 7   days_since_order         53031 non-null  float64       
 8   avg_freq                 55691 non-null  float64       
 9   user_zipcode_current     55691 non-null  category      
 10  account_created_at       55691 non-null  datetime64[ns]
 11  birthdate                55389 non-null  datetime64[ns]
 12  gender                   41456 n

In [31]:
fill_values = {
    'wait_time': orders['wait_time'].mean(),
    'days_since_order': orders['days_since_order'].mean(),
    'avg_freq': orders['avg_freq'].mean(),
    'edibles': 0,
    'flower': 0,
    'prerolls': 0,
    'vaporizers': 0,
    'age_at_purchase': orders['age_at_purchase'].mean()
    }

orders = orders.fillna(fill_values)

In [32]:
orders_final = orders.drop(['order_id','account_created_at','birthdate','gender','payment_method','wait_time'], axis=1).copy()

In [33]:
orders_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55691 entries, 0 to 55690
Data columns (total 22 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   user_id                  55691 non-null  string        
 1   delivery_datetime        55691 non-null  datetime64[ns]
 2   promo_credit             55691 non-null  float64       
 3   order_price_total        55691 non-null  float64       
 4   days_since_order         55691 non-null  float64       
 5   avg_freq                 55691 non-null  float64       
 6   user_zipcode_current     55691 non-null  category      
 7   edibles                  55691 non-null  float64       
 8   flower                   55691 non-null  float64       
 9   prerolls                 55691 non-null  float64       
 10  vaporizers               55691 non-null  float64       
 11  age_at_purchase          55691 non-null  float64       
 12  account_age_at_purchase  55691 n

In [34]:
user_count = orders_final.user_id.nunique()
user_count

2660

# Deep Learning START

In [35]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        # Configure GPU memory growth
        for gpu in physical_devices:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled")

        # Set mixed precision policy
        tf.keras.mixed_precision.set_global_policy('mixed_float16')
        print("Mixed precision policy set to float16")
    except RuntimeError as e:
        print(e)

Num GPUs Available:  1
GPU memory growth enabled
Mixed precision policy set to float16


In [36]:
# Before calling create_batch_sequences:
print("Data check:")
user_counts = orders_final.groupby('user_id').size()
print("\nOrder counts distribution:")
print(user_counts.describe())
print(f"\nUsers with 5+ orders: {sum(user_counts >= 5)}")
print(f"Min orders for a user: {user_counts.min()}")
print(f"Max orders for a user: {user_counts.max()}")

Data check:

Order counts distribution:
count    2660.000000
mean       20.522556
std        26.527496
min         5.000000
25%         7.000000
50%        11.000000
75%        22.000000
max       263.000000
dtype: float64

Users with 5+ orders: 2660
Min orders for a user: 5
Max orders for a user: 263


In [37]:
# Before running create_batch_sequences:
df=orders_final
print("Date range of data:")
print(f"Start: {df['delivery_datetime'].min()}")
print(f"End: {df['delivery_datetime'].max()}")
print(f"Days spanned: {(df['delivery_datetime'].max() - df['delivery_datetime'].min()).days}")

Date range of data:
Start: 2020-01-01 00:32:05.740000
End: 2025-01-25 17:29:22.043000
Days spanned: 1851


In [42]:
class OrderSequenceGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, sequence_length=180, batch_size=32, validation=False, **kwargs):
        super().__init__(**kwargs)  # Add super init to handle kwargs
        self.df = df.copy()
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.validation = validation

        # Pre-sort data by user and datetime
        self.df['date'] = self.df['delivery_datetime'].dt.date
        self.df = self.df.sort_values(['user_id', 'delivery_datetime'])

        # Create user-order pairs for sequences
        self.sequences = []
        for user_id in self.df['user_id'].unique():
            user_orders = self.df[self.df['user_id'] == user_id]
            if len(user_orders) >= 5:  # Minimum order requirement
                # For validation, only use the last order
                if validation:
                    if len(user_orders) > 5:  # Ensure we have enough history
                        self.sequences.append((user_id, user_orders.index[-1]))
                else:
                    # For training, use all orders except the last one
                    for idx in user_orders.index[4:-1]:  # Start from 5th order to ensure history
                        self.sequences.append((user_id, idx))

        self.indexes = np.arange(len(self.sequences))
        print(f"Created {'validation' if validation else 'training'} generator with {len(self.sequences)} sequences")

    def __len__(self):
        return max(1, int(np.ceil(len(self.sequences) / self.batch_size)))

    def __getitem__(self, idx):
        # Get batch indexes
        batch_indexes = self.indexes[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_sequences = [self.sequences[i] for i in batch_indexes]

        # Initialize batch arrays
        features = ['promo_credit', 'order_price_total', 'days_since_order',
                   'avg_freq', 'edibles', 'flower', 'prerolls', 'vaporizers',
                   'age_at_purchase', 'account_age_at_purchase', 'weekday',
                   'hour', 'month', 'is_payment_ach', 'is_payment_card',
                   'is_payment_cash', 'is_payment_promotion', 'is_male', 'is_female']

        batch_size = len(batch_sequences)
        batch_X = np.zeros((batch_size, self.sequence_length, len(features)))
        batch_y = np.zeros(batch_size)

        for i, (user_id, order_idx) in enumerate(batch_sequences):
            user_orders = self.df[self.df['user_id'] == user_id].copy()
            current_order = user_orders.loc[order_idx]

            # Create sequence leading up to current order
            history_start = current_order['delivery_datetime'] - pd.Timedelta(days=self.sequence_length)

            # Fill sequence array
            history = user_orders[
                (user_orders['delivery_datetime'] >= history_start) &
                (user_orders['delivery_datetime'] < current_order['delivery_datetime'])
            ]

            for _, order in history.iterrows():
                days_from_start = (order['delivery_datetime'] - history_start).days
                if 0 <= days_from_start < self.sequence_length:
                    batch_X[i, days_from_start] = order[features].values

            # Check for order in next 7 days
            next_week = current_order['delivery_datetime'] + pd.Timedelta(days=7)
            future_order = user_orders[
                (user_orders['delivery_datetime'] > current_order['delivery_datetime']) &
                (user_orders['delivery_datetime'] <= next_week)
            ]
            batch_y[i] = 1 if len(future_order) > 0 else 0

        return batch_X, batch_y

    def on_epoch_end(self):
        if not self.validation:  # Only shuffle training data
            np.random.shuffle(self.indexes)

In [41]:
def weighted_binary_crossentropy(beta=20.0):
    """
    Custom loss function that penalizes missing orders (false negatives) more heavily.
    Args:
        beta: How many times more important missing an order is versus
             incorrectly predicting an order
    """
    def loss(y_true, y_pred):
        # Standard binary crossentropy calculation
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.clip_by_value(y_pred, 1e-7, 1 - 1e-7)

        # Separate the losses for orders (1) and non-orders (0)
        bce_0 = (1 - y_true) * tf.math.log(1 - y_pred)
        bce_1 = y_true * tf.math.log(y_pred)

        # Weight the order cases more heavily
        return -tf.reduce_mean(bce_0 + beta * bce_1)

    return loss

In [43]:
def train_model_with_order_focus(orders_df, batch_size=32, epochs=10, beta=10.0):
    print("Starting training preparation...")

    # Split into train and validation
    train_cutoff = orders_df['delivery_datetime'].max() - timedelta(days=30)
    train_df = orders_df[orders_df['delivery_datetime'] <= train_cutoff]
    val_df = orders_df[orders_df['delivery_datetime'] > train_cutoff]

    # Create generators
    train_generator = OrderSequenceGenerator(train_df, batch_size=batch_size, validation=False)
    val_generator = OrderSequenceGenerator(val_df, batch_size=batch_size, validation=True)

    # Create model with custom loss
    sequence_length = 180
    n_features = 19

    model = models.Sequential([
        layers.Input(shape=(sequence_length, n_features)),
        layers.LSTM(64, return_sequences=True),
        layers.Dropout(0.2),
        layers.LSTM(32),
        layers.Dropout(0.2),
        layers.Dense(16, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

    # Use custom loss function
    model.compile(
        optimizer=optimizer,
        loss=weighted_binary_crossentropy(beta=beta),
        metrics=[
            tf.keras.metrics.FalseNegatives(name='missed_orders'),
            tf.keras.metrics.TruePositives(name='caught_orders'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall')
        ]
    )

    print(f"Training model with {beta}x penalty for missing orders...")

    # Train model
    history = model.fit(
        train_generator,
        validation_data=val_generator,
        epochs=epochs
    )

    return model, history.history

In [45]:
model, history = train_model_with_order_focus(orders_final, batch_size=32, epochs=4, beta=20.0)

Starting training preparation...
Created training generator with 41859 sequences
Created validation generator with 4 sequences
Training model with 20.0x penalty for missing orders...
Epoch 1/4
[1m1309/1309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m521s[0m 394ms/step - caught_orders: 5204.1338 - loss: 1.8619 - missed_orders: 59.2519 - precision: 0.2609 - recall: 0.9907 - val_caught_orders: 0.0000e+00 - val_loss: 3.3626 - val_missed_orders: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/4
[1m1309/1309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m514s[0m 393ms/step - caught_orders: 5116.3970 - loss: 1.7340 - missed_orders: 138.3557 - precision: 0.2860 - recall: 0.9709 - val_caught_orders: 0.0000e+00 - val_loss: 3.8566 - val_missed_orders: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 3/4
[1m1309/1309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m514s[0m 393ms/step - caught_orders: 5143.2559 - loss: 1.6841 - missed_orders: 150.

In [None]:
def plot_training_history(history):
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))

    # Plot loss
    ax1.plot(history['loss'], label='Training Loss')
    ax1.plot(history['val_loss'], label='Validation Loss')
    ax1.set_title('Model Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()

    # Plot accuracy
    ax2.plot(history['accuracy'], label='Training Accuracy')
    ax2.plot(history['val_accuracy'], label='Validation Accuracy')
    ax2.set_title('Model Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    ax2.legend()

    # Plot AUC
    ax3.plot(history['auc'], label='Training AUC')
    ax3.plot(history['val_auc'], label='Validation AUC')
    ax3.set_title('Model AUC')
    ax3.set_xlabel('Epoch')
    ax3.set_ylabel('AUC')
    ax3.legend()

    plt.tight_layout()
    plt.show()

In [None]:
plot_training_history(history)

Time Series Analysis with a pre build Keras model

In [40]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

def analyze_feature_importance(orders_df):
    # Create a flattened dataset (non-sequential) for quick analysis
    features = ['promo_credit', 'order_price_total', 'days_since_order',
                'avg_freq', 'edibles', 'flower', 'prerolls', 'vaporizers',
                'age_at_purchase', 'account_age_at_purchase', 'weekday',
                'hour', 'month', 'is_payment_ach', 'is_payment_card',
                'is_payment_cash', 'is_payment_promotion', 'is_male', 'is_female']

    # Prepare the data
    X_list = []
    y_list = []

    for user_id in orders_df['user_id'].unique():
        user_orders = orders_df[orders_df['user_id'] == user_id].sort_values('delivery_datetime')
        if len(user_orders) >= 5:  # Keep minimum order requirement
            for i in range(4, len(user_orders)-1):
                # Get current order features
                current_order = user_orders.iloc[i]
                X_list.append(current_order[features].values)

                # Check for order in next 7 days
                next_week = current_order['delivery_datetime'] + pd.Timedelta(days=7)
                future_order = user_orders[
                    (user_orders['delivery_datetime'] > current_order['delivery_datetime']) &
                    (user_orders['delivery_datetime'] <= next_week)
                ]
                y_list.append(1 if len(future_order) > 0 else 0)

    X = np.array(X_list)
    y = np.array(y_list)

    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Train a random forest
    rf = RandomForestClassifier(n_estimators=100, max_depth=10, n_jobs=-1)
    rf.fit(X_scaled, y)

    # Get feature importance
    importance = pd.DataFrame({
        'feature': features,
        'importance': rf.feature_importances_
    }).sort_values('importance', ascending=False)

    # Add mean values and standard deviations
    importance['mean'] = X.mean(axis=0)

    # Convert to numeric and handle NaNs before calculating std
    X_numeric = pd.DataFrame(X, columns=features).apply(pd.to_numeric, errors='coerce').fillna(0)
    importance['std'] = X_numeric.std(axis=0).values # Calculate std on the numeric DataFrame

    # Calculate correlation with target
    correlations = []
    for i in range(X.shape[1]):
        # Convert to numeric and handle NaNs before correlation
        # x_col = pd.to_numeric(X[:, i], errors='coerce').fillna(0)  # Original line causing the error
        x_col = pd.to_numeric(X[:, i], errors='coerce')

        # Convert x_col to a Pandas Series to use fillna
        x_col = pd.Series(x_col)  # This line is added

        x_col = x_col.fillna(0) # Call fillna on the Series
        corr = np.corrcoef(x_col, y)[0, 1]
        correlations.append(corr)

    importance['correlation'] = correlations

    print("\nFeature Importance Analysis:")
    print("-" * 80)
    print("Features ranked by importance (showing correlation and statistics):")
    print(importance.round(4))

    # Identify potentially redundant features
    low_importance_mask = importance['importance'] < importance['importance'].mean() / 2
    low_correlation_mask = abs(importance['correlation']) < 0.1

    print("\nPotentially removable features:")
    print("(Low importance and low correlation with target)")
    potentially_removable = importance[low_importance_mask & low_correlation_mask]['feature'].tolist()
    for feature in potentially_removable:
        print(f"- {feature}")

    return importance, potentially_removable

# Run the analysis
importance_df, removable_features = analyze_feature_importance(orders_final)


Feature Importance Analysis:
--------------------------------------------------------------------------------
Features ranked by importance (showing correlation and statistics):
                    feature  importance        mean      std  correlation
2          days_since_order      0.3830   12.826363  18.1701      -0.0524
3                  avg_freq      0.3481  119.358527  65.6273      -0.0697
1         order_price_total      0.0426   32.159845  63.2764      -0.1818
9   account_age_at_purchase      0.0405    34.79802  33.5779      -0.2893
8           age_at_purchase      0.0319    1.161072   2.2699      -0.1026
4                   edibles      0.0265     0.72513   1.1312       0.0179
0              promo_credit      0.0254     0.55901   1.1638       0.0280
7                vaporizers      0.0235    0.880092   1.3055      -0.0724
12                    month      0.0165   35.572097   9.7384      -0.0435
11                     hour      0.0154   35.879055  22.2622      -0.0296
10     

  c /= stddev[:, None]
  c /= stddev[None, :]
