In [5]:
import functions as fn

In [6]:
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.model_selection import train_test_split

In [7]:
def find_h_hat(dataframe_x, feature_columns ):
    X = dataframe_x[feature_columns].values
    theta = np.random.randn( len( feature_columns ) ) * 0.01
    min_h = 15.0 / ( 24 * 60 ) # 15 Mins
    max_h = 274 # 9 months
    theta_x = X.dot(theta)
    h_hat = 2 ** theta_x
    h_hat = np.clip( h_hat, min_h, max_h )
    return h_hat

In [8]:
def find_p_hat(h_hat, dataframe_x):
    Delta = dataframe_x['t'].values
    p_hat = 2 ** -Delta / h_hat
    p_hat = np.clip( p_hat, 0.0001, 0.9999 )
    return p_hat

In [9]:
def ada_derivative( ):
    grad_theta = -2 * X.T.dot( ( p - p_hat ) * np.log( 2 ) * 

SyntaxError: incomplete input (80324573.py, line 2)

In [18]:
class hlr_adagrad:
    
    def __init__(self, feature_columns, alpha=0.01, lambda_=0.1, eta=0.001, epsilon=0.001):
        
        """
        Initializes the Half Life Regression Model with adaptive learning rate (AdaGrad).

        :param feature_columns: List of feature names.
        :param alpha: Weight for the half-life term in the loss function.
        :param lambda_: Regularization parameter.
        :param eta: Initial learning rate for AdaGrad.
        :param epsilon: Small value to avoid division by zero in AdaGrad.
        """        

        self.theta = np.random.randn( len( feature_columns ) ) * 0.01
        self.feature_columns = feature_columns
        self.alpha = alpha
        self.lambda_ = lambda_
        self.eta = eta
        self.epsilon = epsilon

    def _find_h_hat(self, X):
        min_h = 15.0 / ( 24 * 60 ) # 15 Mins
        max_h = 274 # 9 months
        theta_x = X.dot(self.theta)
        h_hat = 2 ** theta_x
        h_hat = np.clip( h_hat, min_h, max_h )
        return h_hat

    def _find_p_hat(self, h_hat, Delta):
        p_hat = 2 ** -Delta / h_hat
        p_hat = np.clip( p_hat, 0.0001, 0.9999 )
        return p_hat

    def _cost_function(self, X, p, Delta):
        h_hat = self._find_h_hat(X)
        p_hat = self._find_p_hat(h_hat, Delta)
        h = -Delta / np.log2(p + self.epsilon)
        total_cost = (p - p_hat) ** 2 + self.alpha * (h - h_hat) ** 2 + self.lambda_ * np.sum(self.theta ** 2)
        # total_cost += self.lambda_ * np.sum(self.theta ** 2)
        return total_cost

    def train(self, dataframe_x, dataframe_y, max_iter=1000000, tolerance=1e-7, print_iter=1000):
        X = dataframe_x[self.feature_columns].values
        p = dataframe_y.values.flatten()
        Delta = dataframe_x['t'].values
        grad_accumulation = np.zeros_like(self.theta)
        cost_history = []

        for iteration in range(max_iter):
            h_hat = self._find_h_hat(X)
            p_hat = self._find_p_hat(h_hat, Delta)
            # grad_theta = -2 * X.T.dot((p - p_hat) * np.log(2) * p_hat * (2 ** (-Delta / h_hat)) + 
                                      # self.alpha * (h_hat + Delta / np.log2(p)) * np.log(2) * h_hat) + 2 * self.lambda_ * self.theta
                
            grad_theta = -2 * X.T.dot( ( p_hat - p ) * np.log( 2 ) * p_hat * (  -Delta / h_hat  ) + 2 * self.alpha * ( h_hat + Delta / np.log2( p ) ) * np.log( 2 ) * h_hat ) + 2 * self.lambda_ * self.theta
            
            grad_accumulation += grad_theta ** 2

            adjusted_eta = self.eta / (np.sqrt(grad_accumulation) + self.epsilon) 

            self.theta -= adjusted_eta * grad_theta / len(X)
            
            print( f'At iteration { iteration } self.theta is { self.theta }' )    
            
            cost = self._cost_function(X, p, Delta)
            cost_history.append(cost)

            if iteration % print_iter == 0:
                print(f"Iteration {iteration}, Loss: {len(cost)}")

            # Check for convergence
            if iteration > 0 and np.abs(cost_history[-1] - cost_history[-2]) < tolerance:
                print(f"Convergence reached at iteration {iteration}, Loss {cost}")
                break

        return cost_history

    def predict(self, row):
        x = np.array([row[feature] for feature in self.feature_columns])
        h_hat = self._find_h_hat(x[np.newaxis, :])[0]
        p_hat = self._find_p_hat(h_hat, row['t'])
        
        return p_hat, h_hat

In [19]:
data        = pd.read_csv( 'subset_1000.csv' )
dummies     = pd.get_dummies( data[ 'lexeme' ], prefix = 'cat', dtype=float )
dummies_col = dummies.columns.to_list()
df          = pd.concat( [ data, dummies ], axis = 1 )

pred_vars = [ 'right', 'wrong', 'bias', 't' ]
dummies_  = dummies_col + pred_vars

X_train, X_test, Y_train, Y_test = train_test_split( df[ dummies_ ], 
                                                     df[ 'p' ], 
                                                     test_size    = 0.30,
                                                     random_state = 7 )

dummies_.remove('t')

In [20]:
adagrad = hlr_adagrad(feature_columns=dummies_, lambda_ = 0.01, eta = 0.001)
cost_history = adagrad.train(X_train, Y_train )

At iteration 0 self.theta is [-7.25320688e-03  1.18747335e-02  1.84288614e-03 -5.77769845e-03
 -6.34036897e-03 -2.03233713e-02 -1.42399321e-02  1.36001195e-02
 -3.10675801e-03  3.36613163e-03  1.45311907e-02 -7.18294554e-04
  2.67784656e-03 -2.44628538e-03 -6.76731378e-03  8.50077566e-03
  1.85838986e-03  2.70683517e-04 -8.24745974e-03 -6.59346551e-03
  1.20821282e-02 -2.19783203e-03  2.48795680e-02  1.15266552e-02
 -9.62258391e-03 -2.45210575e-04  5.11787877e-03 -1.37213950e-02
 -5.24320703e-03  4.61108156e-03  6.45666213e-03  8.24798129e-03
 -1.22684855e-02 -2.44637486e-03  2.53138636e-03 -3.71614859e-03
  2.85820517e-03  5.96936096e-03 -6.42018518e-04  2.73463178e-03
 -1.27722861e-02  3.78989691e-03  5.92322233e-03 -6.77405752e-03
  1.31753566e-03 -1.20044505e-02  7.52510407e-03  1.37646418e-02
  6.41364287e-03 -1.67310240e-02 -1.11980272e-03  6.37771194e-03
  2.26108721e-02  1.02246333e-03 -1.39930257e-02  6.95943000e-03
  9.41949878e-04 -7.33238465e-03  1.17754617e-03  4.22871666e


KeyboardInterrupt



In [None]:
p_hat = find_p_hat( h_hat, X_train )
p_hat