In [3]:
# Import necessary libraries
import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds
import psycopg2

# Replace with your own database connection details
conn = psycopg2.connect(
    dbname="postgres",
    user="postgres",
    password="",
    host="localhost",
    port="5432"
)

# Load transaction data into a Pandas DataFrame
transaksi_df = pd.read_sql_query("SELECT * FROM transactions WHERE status_transaction = 'D' and date >= CURRENT_DATE - INTERVAL '90 days'", conn)

# Load promo data into a Pandas DataFrame
promo_df = pd.read_sql_query('SELECT * FROM promo', conn)


  transaksi_df = pd.read_sql_query("SELECT * FROM transactions WHERE status_transaction = 'D' and date >= CURRENT_DATE - INTERVAL '90 days'", conn)
  promo_df = pd.read_sql_query('SELECT * FROM promo', conn)


In [6]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold

# Preprocessing Data
def preprocess_data(transaksi_df):
    le_merchant = LabelEncoder()
    le_cif = LabelEncoder()
    
    transaksi_df['merchant_encoded'] = le_merchant.fit_transform(transaksi_df['merchant_name'])
    transaksi_df['cif_encoded'] = le_cif.fit_transform(transaksi_df['cif'])
    
    return transaksi_df, le_merchant, le_cif

# Prepare sequences
def prepare_sequences(transaksi_df, sequence_length=10):
    transaksi_df = transaksi_df.sort_values(by=['cif', 'date'])
    
    sequences = []
    targets = []
    
    for cif, group in transaksi_df.groupby('cif_encoded'):
        merchant_list = group['merchant_encoded'].tolist()
        
        for i in range(len(merchant_list) - sequence_length):
            sequences.append(merchant_list[i:i + sequence_length])
            targets.append(merchant_list[i + sequence_length])
    
    return np.array(sequences), np.array(targets)

# Define the RNN Model
def build_model(num_merchants, sequence_length, embedding_dim, lstm_units, dropout_rate, learning_rate):
    model = tf.keras.Sequential([
        Embedding(input_dim=num_merchants, output_dim=embedding_dim, input_length=sequence_length),
        LSTM(lstm_units, return_sequences=False),
        Dropout(dropout_rate),
        Dense(num_merchants, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Function to perform manual grid search
def manual_grid_search(X, y, num_merchants, param_grid, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    best_score = 0
    best_params = None
    
    for params in param_grid:
        scores = []
        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]
            
            model = build_model(num_merchants=num_merchants,
                                sequence_length=X.shape[1],
                                embedding_dim=params['embedding_dim'],
                                lstm_units=params['lstm_units'],
                                dropout_rate=params['dropout_rate'],
                                learning_rate=params['learning_rate'])
            
            history = model.fit(X_train, y_train, 
                                epochs=params['epochs'], 
                                batch_size=params['batch_size'],
                                validation_data=(X_val, y_val),
                                verbose=0)
            
            val_accuracy = max(history.history['val_accuracy'])
            scores.append(val_accuracy)
        
        mean_score = np.mean(scores)
        print(f"Params: {params}, Mean validation accuracy: {mean_score:.4f}")
        
        if mean_score > best_score:
            best_score = mean_score
            best_params = params
    
    return best_params, best_score

# Main execution
if __name__ == "__main__":
    # Assuming transaksi_df is already loaded
    transaksi_df, le_merchant, le_cif = preprocess_data(transaksi_df)

    # Prepare sequences
    sequences, targets = prepare_sequences(transaksi_df)

    # Get the number of unique merchants
    num_merchants = len(le_merchant.classes_)
    
    # Define parameter grid
    param_grid = [
        {'embedding_dim': 128, 'lstm_units': 128, 'dropout_rate': 0.2, 'learning_rate': 1e-3, 'batch_size': 32, 'epochs': 20},
        {'embedding_dim': 192, 'lstm_units': 256, 'dropout_rate': 0.3, 'learning_rate': 5e-4, 'batch_size': 64, 'epochs': 50},
        {'embedding_dim': 256, 'lstm_units': 512, 'dropout_rate': 0.4, 'learning_rate': 1e-4, 'batch_size': 128, 'epochs': 100}
    ]
    
    # Perform manual grid search
    best_params, best_score = manual_grid_search(sequences, targets, num_merchants, param_grid)
    
    print("\nBest Hyperparameters:", best_params)
    print("Best Score:", best_score)



Params: {'embedding_dim': 128, 'lstm_units': 128, 'dropout_rate': 0.2, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 20}, Mean validation accuracy: 0.0930
Params: {'embedding_dim': 192, 'lstm_units': 256, 'dropout_rate': 0.3, 'learning_rate': 0.0005, 'batch_size': 64, 'epochs': 50}, Mean validation accuracy: 0.0932
Params: {'embedding_dim': 256, 'lstm_units': 512, 'dropout_rate': 0.4, 'learning_rate': 0.0001, 'batch_size': 128, 'epochs': 100}, Mean validation accuracy: 0.0930

Best Hyperparameters: {'embedding_dim': 192, 'lstm_units': 256, 'dropout_rate': 0.3, 'learning_rate': 0.0005, 'batch_size': 64, 'epochs': 50}
Best Score: 0.09323467165231705


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold

# Preprocessing Data
def preprocess_data(transaksi_df):
    le_merchant = LabelEncoder()
    le_cif = LabelEncoder()
    
    transaksi_df['merchant_encoded'] = le_merchant.fit_transform(transaksi_df['merchant_name'])
    transaksi_df['cif_encoded'] = le_cif.fit_transform(transaksi_df['cif'])
    
    return transaksi_df, le_merchant, le_cif

# Prepare sequences
def prepare_sequences(transaksi_df, sequence_length=10):
    transaksi_df = transaksi_df.sort_values(by=['cif', 'date'])
    
    sequences = []
    targets = []
    
    for cif, group in transaksi_df.groupby('cif_encoded'):
        merchant_list = group['merchant_encoded'].tolist()
        
        for i in range(len(merchant_list) - sequence_length):
            sequences.append(merchant_list[i:i + sequence_length])
            targets.append(merchant_list[i + sequence_length])
    
    return np.array(sequences), np.array(targets)

# Define the RNN Model
def build_model(num_merchants, sequence_length, embedding_dim, lstm_units, dropout_rate, learning_rate, lstm_activation):
    model = tf.keras.Sequential([
        Embedding(input_dim=num_merchants, output_dim=embedding_dim, input_length=sequence_length),
        LSTM(lstm_units, return_sequences=False, activation=lstm_activation),
        Dropout(dropout_rate),
        Dense(num_merchants, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Function to perform manual grid search
def manual_grid_search(X, y, num_merchants, param_grid, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    best_score = 0
    best_params = None
    
    for params in param_grid:
        scores = []
        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]
            
            model = build_model(num_merchants=num_merchants,
                                sequence_length=X.shape[1],
                                embedding_dim=params['embedding_dim'],
                                lstm_units=params['lstm_units'],
                                dropout_rate=params['dropout_rate'],
                                learning_rate=params['learning_rate'],
                                lstm_activation=params['lstm_activation'])
            
            history = model.fit(X_train, y_train, 
                                epochs=params['epochs'], 
                                batch_size=params['batch_size'],
                                validation_data=(X_val, y_val),
                                verbose=0)
            
            val_accuracy = max(history.history['val_accuracy'])
            scores.append(val_accuracy)
        
        mean_score = np.mean(scores)
        print(f"Params: {params}, Mean validation accuracy: {mean_score:.4f}")
        
        if mean_score > best_score:
            best_score = mean_score
            best_params = params
    
    return best_params, best_score

# Main execution
if __name__ == "__main__":
    # Assuming transaksi_df is already loaded
    transaksi_df, le_merchant, le_cif = preprocess_data(transaksi_df)

    # Prepare sequences
    sequences, targets = prepare_sequences(transaksi_df)

    # Get the number of unique merchants
    num_merchants = len(le_merchant.classes_)
    
    # Define an expanded parameter grid with LSTM activation functions
    param_grid = [
        {'embedding_dim': 64, 'lstm_units': 64, 'dropout_rate': 0.1, 'learning_rate': 1e-3, 'batch_size': 32, 'epochs': 20, 'lstm_activation': 'tanh'},
        {'embedding_dim': 128, 'lstm_units': 128, 'dropout_rate': 0.2, 'learning_rate': 5e-4, 'batch_size': 64, 'epochs': 30, 'lstm_activation': 'relu'},
        {'embedding_dim': 192, 'lstm_units': 256, 'dropout_rate': 0.3, 'learning_rate': 1e-4, 'batch_size': 128, 'epochs': 40, 'lstm_activation': 'tanh'},
        {'embedding_dim': 256, 'lstm_units': 512, 'dropout_rate': 0.4, 'learning_rate': 5e-5, 'batch_size': 256, 'epochs': 50, 'lstm_activation': 'relu'},
        {'embedding_dim': 384, 'lstm_units': 384, 'dropout_rate': 0.25, 'learning_rate': 7.5e-4, 'batch_size': 96, 'epochs': 35, 'lstm_activation': 'sigmoid'},
        {'embedding_dim': 512, 'lstm_units': 768, 'dropout_rate': 0.35, 'learning_rate': 2.5e-4, 'batch_size': 192, 'epochs': 45, 'lstm_activation': 'hard_sigmoid'},
    ]
    
    # Perform manual grid search
    best_params, best_score = manual_grid_search(sequences, targets, num_merchants, param_grid)
    
    print("\nBest Hyperparameters:", best_params)
    print("Best Score:", best_score)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold

# Preprocessing Data
def preprocess_data(transaksi_df):
    le_merchant = LabelEncoder()
    le_cif = LabelEncoder()
    
    transaksi_df['merchant_encoded'] = le_merchant.fit_transform(transaksi_df['merchant_name'])
    transaksi_df['cif_encoded'] = le_cif.fit_transform(transaksi_df['cif'])
    
    return transaksi_df, le_merchant, le_cif

# Prepare sequences
def prepare_sequences(transaksi_df, sequence_length=10):
    transaksi_df = transaksi_df.sort_values(by=['cif', 'date'])
    
    sequences = []
    targets = []
    
    for cif, group in transaksi_df.groupby('cif_encoded'):
        merchant_list = group['merchant_encoded'].tolist()
        
        for i in range(len(merchant_list) - sequence_length):
            sequences.append(merchant_list[i:i + sequence_length])
            targets.append(merchant_list[i + sequence_length])
    
    return np.array(sequences), np.array(targets)

# Define the RNN Model
def build_model(num_merchants, sequence_length, embedding_dim, lstm_units, dropout_rate, learning_rate, lstm_activation):
    model = tf.keras.Sequential([
        Embedding(input_dim=num_merchants, output_dim=embedding_dim, input_length=sequence_length),
        LSTM(lstm_units, return_sequences=False, activation=lstm_activation),
        Dropout(dropout_rate),
        Dense(num_merchants, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Function to perform manual grid search
def manual_grid_search(X, y, num_merchants, param_grid, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    best_score = 0
    best_params = None
    
    for params in param_grid:
        scores = []
        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]
            
            model = build_model(num_merchants=num_merchants,
                                sequence_length=X.shape[1],
                                embedding_dim=params['embedding_dim'],
                                lstm_units=params['lstm_units'],
                                dropout_rate=params['dropout_rate'],
                                learning_rate=params['learning_rate'],
                                lstm_activation=params['lstm_activation'])
            
            history = model.fit(X_train, y_train, 
                                epochs=params['epochs'], 
                                batch_size=params['batch_size'],
                                validation_data=(X_val, y_val),
                                verbose=0)
            
            val_accuracy = max(history.history['val_accuracy'])
            scores.append(val_accuracy)
        
        mean_score = np.mean(scores)
        print(f"Params: {params}, Mean validation accuracy: {mean_score:.4f}")
        
        if mean_score > best_score:
            best_score = mean_score
            best_params = params
    
    return best_params, best_score

# Main execution
if __name__ == "__main__":
    # Assuming transaksi_df is already loaded
    transaksi_df, le_merchant, le_cif = preprocess_data(transaksi_df)

    # Prepare sequences
    sequences, targets = prepare_sequences(transaksi_df)

    # Get the number of unique merchants
    num_merchants = len(le_merchant.classes_)
    
    # Define an expanded parameter grid
    param_grid = [
        # Original configurations
        {'embedding_dim': 128, 'lstm_units': 128, 'dropout_rate': 0.2, 'learning_rate': 1e-3, 'batch_size': 32, 'epochs': 20, 'lstm_activation': 'tanh'},
        {'embedding_dim': 192, 'lstm_units': 256, 'dropout_rate': 0.3, 'learning_rate': 5e-4, 'batch_size': 64, 'epochs': 50, 'lstm_activation': 'tanh'},
        {'embedding_dim': 256, 'lstm_units': 512, 'dropout_rate': 0.4, 'learning_rate': 1e-4, 'batch_size': 128, 'epochs': 100, 'lstm_activation': 'tanh'},
        
        # Variations on embedding dimensions
        {'embedding_dim': 64, 'lstm_units': 128, 'dropout_rate': 0.2, 'learning_rate': 1e-3, 'batch_size': 32, 'epochs': 30, 'lstm_activation': 'relu'},
        {'embedding_dim': 320, 'lstm_units': 256, 'dropout_rate': 0.3, 'learning_rate': 5e-4, 'batch_size': 64, 'epochs': 60, 'lstm_activation': 'tanh'},
        
        # Variations on LSTM units
        {'embedding_dim': 192, 'lstm_units': 64, 'dropout_rate': 0.3, 'learning_rate': 5e-4, 'batch_size': 64, 'epochs': 40, 'lstm_activation': 'relu'},
        {'embedding_dim': 256, 'lstm_units': 384, 'dropout_rate': 0.4, 'learning_rate': 1e-4, 'batch_size': 128, 'epochs': 80, 'lstm_activation': 'tanh'},
        
        # Variations on dropout rate
        {'embedding_dim': 128, 'lstm_units': 128, 'dropout_rate': 0.1, 'learning_rate': 1e-3, 'batch_size': 32, 'epochs': 25, 'lstm_activation': 'sigmoid'},
        {'embedding_dim': 256, 'lstm_units': 512, 'dropout_rate': 0.5, 'learning_rate': 1e-4, 'batch_size': 128, 'epochs': 90, 'lstm_activation': 'tanh'},
        
        # Variations on learning rate
        {'embedding_dim': 192, 'lstm_units': 256, 'dropout_rate': 0.3, 'learning_rate': 1e-5, 'batch_size': 64, 'epochs': 70, 'lstm_activation': 'relu'},
        {'embedding_dim': 256, 'lstm_units': 512, 'dropout_rate': 0.4, 'learning_rate': 5e-3, 'batch_size': 128, 'epochs': 40, 'lstm_activation': 'tanh'},
        
        # Variations on batch size
        {'embedding_dim': 128, 'lstm_units': 128, 'dropout_rate': 0.2, 'learning_rate': 1e-3, 'batch_size': 16, 'epochs': 30, 'lstm_activation': 'hard_sigmoid'},
        {'embedding_dim': 256, 'lstm_units': 512, 'dropout_rate': 0.4, 'learning_rate': 1e-4, 'batch_size': 256, 'epochs': 60, 'lstm_activation': 'tanh'},
        
        # Variations on epochs
        {'embedding_dim': 192, 'lstm_units': 256, 'dropout_rate': 0.3, 'learning_rate': 5e-4, 'batch_size': 64, 'epochs': 30, 'lstm_activation': 'relu'},
        {'embedding_dim': 256, 'lstm_units': 512, 'dropout_rate': 0.4, 'learning_rate': 1e-4, 'batch_size': 128, 'epochs': 150, 'lstm_activation': 'tanh'},
        
        # Some additional combinations
        {'embedding_dim': 160, 'lstm_units': 320, 'dropout_rate': 0.35, 'learning_rate': 2.5e-4, 'batch_size': 96, 'epochs': 75, 'lstm_activation': 'tanh'},
        {'embedding_dim': 224, 'lstm_units': 448, 'dropout_rate': 0.45, 'learning_rate': 7.5e-5, 'batch_size': 160, 'epochs': 120, 'lstm_activation': 'relu'},
    ]
    
    # Perform manual grid search
    best_params, best_score = manual_grid_search(sequences, targets, num_merchants, param_grid)
    
    print("\nBest Hyperparameters:", best_params)
    print("Best Score:", best_score)