In [324]:
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.model_selection import train_test_split

In [333]:
class hlr_adagrad:
    def __init__(self, feature_columns, alpha=0.01, lambda_=0.1, eta=0.1, epsilon=0.001):
        
        """
        Initializes the Half Life Regression Model with adaptive learning rate (AdaGrad).

        :param feature_columns: List of feature names.
        :param alpha: Weight for the half-life term in the loss function.
        :param lambda_: Regularization parameter.
        :param eta: Initial learning rate for AdaGrad.
        :param epsilon: Small value to avoid division by zero in AdaGrad.
        """        

        self.theta = np.random.randn( len( feature_columns ) ) * 0.01
        self.feature_columns = feature_columns
        self.alpha = alpha
        self.lambda_ = lambda_
        self.eta = eta
        self.epsilon = epsilon

    def _find_h_hat(self, X):
        min_h = 15.0 / ( 24 * 60 ) # 15 Mins
        max_h = 274 # 9 months
        theta_x = X.dot(self.theta)
        h_hat = 2 ** theta_x
        h_hat = np.clip( h_hat, min_h, max_h )
        return h_hat

    def _find_p_hat(self, h_hat, Delta):
        p_hat = 2 ** -Delta / h_hat
        p_hat = np.clip( p_hat, 0.0001, 0.9999 )
        return p_hat

    def _cost_function(self, X, p, Delta):
        h_hat = self._find_h_hat(X)
        p_hat = self._find_p_hat(h_hat, Delta)
        h = -Delta / np.log2(p + self.epsilon)
        total_cost = np.mean((p - p_hat) ** 2 + self.alpha * (h - h_hat) ** 2)
        total_cost += self.lambda_ * np.sum(self.theta ** 2)
        return total_cost

    def train(self, dataframe_x, dataframe_y, max_iter=1000000, tolerance=1e-7, print_iter=1000):
        X = dataframe_x[self.feature_columns].values
        p = dataframe_y.values.flatten()
        Delta = dataframe_x['t'].values
        grad_accumulation = np.zeros_like(self.theta)
        cost_history = []

        for iteration in range(max_iter):
            h_hat = self._find_h_hat(X)
            p_hat = self._find_p_hat(h_hat, Delta)
            grad_theta = -2 * X.T.dot((p - p_hat) * np.log(2) * p_hat * (2 ** (-Delta / h_hat)) + 
                                      self.alpha * (h_hat + Delta / np.log2(p)) * np.log(2) * h_hat) + 2 * self.lambda_ * self.theta
            grad_accumulation += grad_theta ** 2

            adjusted_eta = self.eta / (np.sqrt(grad_accumulation) + self.epsilon)
            self.theta -= adjusted_eta * grad_theta / len(X)
            cost = self._cost_function(X, p, Delta)
            cost_history.append(cost)

            if iteration % print_iter == 0:
                print(f"Iteration {iteration}, Loss: {cost}")

            # Check for convergence
            if iteration > 0 and np.abs(cost_history[-1] - cost_history[-2]) < tolerance:
                print(f"Convergence reached at iteration {iteration}, Loss {cost}")
                break

        return cost_history

    def predict(self, row):
        x = np.array([row[feature] for feature in self.feature_columns])
        h_hat = self._find_h_hat(x[np.newaxis, :])[0]
        p_hat = self._find_p_hat(h_hat, row['t'])
        
        return p_hat, h_hat

In [334]:
def calculate_mae(y_true, y_pred):
    return np.mean(np.abs(np.array(y_true) - np.array(y_pred)))

## Datos

In [335]:
data        = pd.read_csv( 'subset_1000.csv' )
dummies     = pd.get_dummies( data[ 'lexeme' ], prefix = 'cat', dtype=float )
dummies_col = dummies.columns.to_list()
df          = pd.concat( [ data, dummies ], axis = 1 )

pred_vars = [ 'right', 'wrong', 'bias', 't' ]
dummies_  = dummies_col + pred_vars

X_train, X_test, Y_train, Y_test = train_test_split( df[ dummies_ ], 
                                                     df[ 'p' ], 
                                                     test_size    = 0.30,
                                                     random_state = 7 )

dummies_.remove('t')

In [328]:
X_train

Unnamed: 0,cat_en:'/'<apos>,cat_en:'s/'s<gen>,cat_en:<*sf>/actor<n><*numb>,cat_en:<*sf>/authority<n><*numb>,cat_en:<*sf>/bicycle<n><*numb>,cat_en:<*sf>/car<n><*numb>,cat_en:<*sf>/coat<n><*numb>,cat_en:<*sf>/date<n><*numb>,cat_en:<*sf>/difference<n><*numb>,cat_en:<*sf>/house<n><*numb>,...,cat_en:yesterday/yesterday<adv>,cat_en:you/prpers<@ij:thank_you>,cat_en:you/prpers<prn><obj><p2><mf><sp>,cat_en:you/prpers<prn><subj><p2><mf><sp>,cat_en:your/your<det><pos><sp>,cat_en:yours/yours<prn><pos><mf><sp>,right,wrong,bias,t
822,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,4.242641,1.732051,1.0,1.814433
188,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.414214,1.414214,1.0,3.065637
251,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.732051,1.414214,1.0,12.009306
71,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,4.000000,1.414214,1.0,0.016609
664,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.000000,1.000000,1.0,0.003981
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
579,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,4.242641,1.000000,1.0,0.006111
502,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.828427,1.414214,1.0,42.254132
537,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.464102,1.414214,1.0,0.003727
196,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.236068,1.000000,1.0,2.939039


In [336]:
%%time

model = hlr_adagrad(feature_columns=dummies_, eta = 1)
cost_history = model.train(X_train, Y_train)

Iteration 0, Loss: 3199454.5893432465
Iteration 1000, Loss: 3199423.9022667613
Iteration 2000, Loss: 3199414.7615177846
Iteration 3000, Loss: 3199408.8884899365
Iteration 4000, Loss: 3199404.5707250065
Iteration 5000, Loss: 3199401.178550703
Iteration 6000, Loss: 3199398.403597668
Iteration 7000, Loss: 3199396.0702441493
Iteration 8000, Loss: 3199394.0682614013
Iteration 9000, Loss: 3199392.3239797414
Iteration 10000, Loss: 3199390.785648278
Iteration 11000, Loss: 3199389.4154871223
Iteration 12000, Loss: 3199388.185088869
Iteration 13000, Loss: 3199387.0725116553
Iteration 14000, Loss: 3199386.060360404
Iteration 15000, Loss: 3199385.1347517953
Iteration 16000, Loss: 3199384.2844770085
Iteration 17000, Loss: 3199383.5002990807
Iteration 18000, Loss: 3199382.7745021475
Iteration 19000, Loss: 3199382.1005863477
Iteration 20000, Loss: 3199381.4730182905
Iteration 21000, Loss: 3199380.887086462
Iteration 22000, Loss: 3199380.338727083
Iteration 23000, Loss: 3199379.824445852
Iteration 240

In [259]:
# Make predictions over the same dataset

predictions = []
for _, row in X_test.iterrows():
    predicted_p, predicted_h = model.predict(row)
    predictions.append((predicted_p, predicted_h))

X_test['predicted_p'] = [pred[0] for pred in predictions]
X_test['predicted_h'] = [pred[1] for pred in predictions]

Y_pred = X_test[ 'predicted_p' ]

In [260]:
calculate_mae( Y_test, Y_pred )

0.10601866568704317

In [296]:
%%time

model = HalfLifeRegressionModelRMSprop(feature_columns=dummies_)
cost_history = model.train(X_train, Y_train)

Iteration 0, Loss: 3199453.8892288534
Iteration 1000, Loss: 3199453.800251489
Iteration 2000, Loss: 3199453.7947600908
Iteration 3000, Loss: 3199453.7950373953
Iteration 4000, Loss: 3199453.7954465696
Iteration 5000, Loss: 3199453.7959860917
Iteration 6000, Loss: 3199453.7966551343
Iteration 7000, Loss: 3199453.797452915
Iteration 8000, Loss: 3199453.7983787046
Iteration 9000, Loss: 3199453.799431828
Iteration 10000, Loss: 3199453.800611668
Iteration 11000, Loss: 3199453.8019176624
Iteration 12000, Loss: 3199453.8033493026
Iteration 13000, Loss: 3199453.8049061312
Iteration 14000, Loss: 3199453.806587741
Iteration 15000, Loss: 3199453.80839377
Iteration 16000, Loss: 3199453.810323906
Iteration 17000, Loss: 3199453.8123778813
Iteration 18000, Loss: 3199453.8145554727
Iteration 19000, Loss: 3199453.8168564974
Iteration 20000, Loss: 3199453.819280811
Iteration 21000, Loss: 3199453.821828303
Iteration 22000, Loss: 3199453.824498887
Iteration 23000, Loss: 3199453.8272925015
Iteration 24000,

KeyboardInterrupt: 