In [18]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [34]:
def find_h_hat( theta, x ):
    
    '''
    Calculate the estimated half-life h_hat.

    Parameters:
    - theta (np.array): Model parameters.
    - x (np.array): Feature vector for a data instance.

    Returns:
    - float: Estimated half-life h_hat.
    '''
    
    theta_x = np.clip( theta.dot( x ), -10, 10 )
    h_hat   = 2 ** theta_x
    
    return h_hat

In [35]:
def find_p_hat( h_hat, Delta ):
    
    '''
    Calculate the predicted probability of recall p_hat.

    Parameters:
    - h_hat (float): Estimated half-life.
    - Delta (float): Lag time since the item was last practiced.

    Returns:
    - float: Predicted probability of recall p_hat.
    '''
    
    p_hat = 2 ** np.clip( -Delta / h_hat, -10, 10 )  # Prevent underflow/overflow
    
    return p_hat

In [36]:
def cost_function( theta, X, p, Delta, D, alpha, lambda_, epsilon ):
    
    '''
    Calculate the cost for the given theta values.

    Parameters:
    - theta (np.array): Model parameters.
    - X (np.array): Feature vectors for each data instance.
    - p (np.array): Observed recall rates.
    - Delta (np.array): Lag times.
    - alpha (float): Weight for the half-life term in the loss function.
    - lambda_ (float): Regularization parameter.

    Returns:
    - float: Total cost.
    
    '''
    total_cost = 0

    for t in range( D ):
        h_hat = find_h_hat( theta, X[ t ] )
        p_hat = find_p_hat( h_hat, Delta[ t ] )

        # if p[ t ] > 0:
        #     h = -Delta[ t ] / np.log2( p[ t ] )
        # else:
        #     h = 0
        
        h = h = -Delta[ t ] / np.log2( p[ t ] + epsilon )

        total_cost += ( p[ t ] - p_hat ) ** 2 + alpha * ( h - h_hat ) ** 2

    total_cost += lambda_ * np.sum( theta ** 2 )
    
    return total_cost / D  # Average cost per instance

In [63]:
import numpy as np

def adagrad_optimization(X, p, Delta, alpha, lambda_, eta, D, n_iter, tolerance=1e-5, max_iter=200000, epsilon=1e-8):
    """
    Optimize theta using the AdaGrad optimization algorithm, stopping when convergence is reached.

    Parameters:
    - X (np.array): Feature vectors for each data instance.
    - p (np.array): Observed recall rates.
    - Delta (np.array): Lag times since each item was last practiced.
    - alpha (float): Weight for the half-life term in the loss function.
    - lambda_ (float): Regularization parameter.
    - eta (float): Learning rate.
    - theta_initial (np.array): Initial theta values.
    - tolerance (float): Tolerance level for convergence.
    - max_iter (int): Maximum number of iterations.
    - epsilon (float): Small constant to prevent division by zero.

    Returns:
    - np.array: Optimized theta values.
    - list: Cost per iteration.
    """
    theta = np.random.randn( X.shape[ 1 ] )
    grad_accumulation = np.zeros_like(theta)
    cost_history = []

    for iteration in range(max_iter):
        grad_theta = np.zeros_like(theta)
        cost = cost_function(theta, X, p, Delta, D, alpha, lambda_, epsilon )

        for t in range(len(X)):
            h_hat = find_h_hat(theta, X[t])
            p_hat = find_p_hat(h_hat, Delta[t])

            # Compute the gradients for each theta
            for k in range(len(theta)):
                term1 = 2 * (p[t] - p_hat) * np.log(2) * p_hat * (2 ** (-Delta[t] / h_hat)) * X[t][k]
                term2 = 2 * alpha * (h_hat + Delta[t] / np.log2(p[t])) * np.log(2) * h_hat * X[t][k]
                term3 = 2 * lambda_ * theta[k]
                grad_theta[k] += term1 + term2 + term3

        # Update the accumulated gradient
        grad_accumulation += grad_theta ** 2

        # Update theta using AdaGrad adjustment
        adjusted_eta = eta / (np.sqrt(grad_accumulation) + epsilon)
        theta -= adjusted_eta * grad_theta / len(X)

        cost_history.append(cost)
        
        if iteration % n_iter == 0:
            print( f'At iteration { iteration } cost is { cost }' )

        # Check for convergence
        if iteration > 0 and np.abs(cost_history[-1] - cost_history[-2]) < tolerance:
            break

    return theta, cost_history

In [105]:
import pandas as pd
import math
from collections import defaultdict

class HalfLifeRegressionModel(object):
    def __init__(self, lrate=.001, hlwt=.01, l2wt=.1, sigma=1., feature_columns=[]):
        self.weights = defaultdict(float)
        self.fcounts = defaultdict(int)
        self.lrate = lrate
        self.hlwt = hlwt
        self.l2wt = l2wt
        self.sigma = sigma
        self.feature_columns = feature_columns

    def halflife(self, features):
        try:
            dp = sum([self.weights[k]*features[k] for k in self.feature_columns])
            return max(min(2 ** dp, MAX_HALF_LIFE), MIN_HALF_LIFE)
        except:
            return MAX_HALF_LIFE

    def predict(self, row):
        h = self.halflife(row)
        p = 2 ** (-row['t']/h)
        return max(min(p, 0.9999), 0.0001), h

    def train_update(self, row):
        p, h = self.predict(row)
        dlp_dw = 2.*(p-row['p'])*(LN2**2)*p*(row['t']/h)
        dlh_dw = 2.*(h-row['h'])*LN2*h
        for feature in self.feature_columns:
            rate = self.lrate / math.sqrt(1 + self.fcounts[feature])
            x_k = row[feature]
            self.weights[feature] -= rate * dlp_dw * x_k
            self.weights[feature] -= rate * self.hlwt * dlh_dw * x_k
            self.weights[feature] -= rate * self.l2wt * self.weights[feature] / self.sigma**2
            self.fcounts[feature] += 1

    def train(self, dataframe):
        for _, row in dataframe.iterrows():
            self.train_update(row)

In [106]:
# Datos de entrenamiento
data = {
    'p': [0.8, 0.6],
    't': [1, 2],
    'feature1': [0.5, 0.3],
    'feature2': [0.2, 0.4],
    'h': [50, 60]
}

train_df = pd.DataFrame(data)
train_df

Unnamed: 0,p,t,feature1,feature2,h
0,0.8,1,0.5,0.2,50
1,0.6,2,0.3,0.4,60


In [104]:
feature_columns = ['feature1', 'feature2']
model = HalfLifeRegressionModel(feature_columns=feature_columns)
model.train(train_df)

[0.15573727682682853,
 0.156422227129307,
 0.15691298093920147,
 0.15731725736031754,
 0.15766986359083204,
 0.15798716000770952,
 0.15827836774438186,
 0.1585492857867951,
 0.15880384041974052,
 0.1590448355278803,
 0.15927435592395062,
 0.15949400161206267,
 0.15970503224861365,
 0.15990846058434396,
 0.16010511531260102,
 0.16029568473879108,
 0.16048074797136075,
 0.16066079773296177,
 0.16083625738886043,
 0.16100749388904,
 0.16117482776198844,
 0.16133854094162853,
 0.16149888297528864,
 0.16165607600406318,
 0.1618103187998084,
 0.1619617900683712,
 0.16211065117574652,
 0.16225704841578584,
 0.16240111491029918,
 0.16254297221185698,
 0.16268273166424002,
 0.1628204955638732,
 0.16295635815670795,
 0.16309040649818046,
 0.16322272119855216,
 0.1633533770717674,
 0.16348244370266862,
 0.1636099859447826,
 0.16373606435878985,
 0.16386073560009562,
 0.16398405276254008,
 0.16410606568416786,
 0.16422682122005267,
 0.16434636348641396,
 0.16446473407963352,
 0.16458197227325694,


In [93]:
# Datos de prueba
test_data = {
    't': [1.5, 2.0, 0.5, 1.0, 2.5],
    'feature1': [0.4, 0.6, 0.2, 0.3, 0.5],
    'feature2': [0.3, 0.1, 0.5, 0.4, 0.2]
}

test_df = pd.DataFrame(test_data)
test_df

Unnamed: 0,t,feature1,feature2
0,1.5,0.4,0.3
1,2.0,0.6,0.1
2,0.5,0.2,0.5
3,1.0,0.3,0.4
4,2.5,0.5,0.2


In [107]:
results = test_df.copy()
results['predicted_p'] = None
results['predicted_h'] = None

# Hacer la predicción para cada fila
for index, row in test_df.iterrows():
    predicted_p, predicted_h = model.predict(row)
    results.at[index, 'predicted_p'] = predicted_p
    results.at[index, 'predicted_h'] = predicted_h

# Mostrar los resultados
results

Unnamed: 0,t,feature1,feature2,predicted_p,predicted_h
0,1.5,0.4,0.3,0.378945,1.071474
1,2.0,0.6,0.1,0.11918,0.651723
2,0.5,0.2,0.5,0.821403,1.76157
3,1.0,0.3,0.4,0.603791,1.373855
4,2.5,0.5,0.2,0.125721,0.835646


In [108]:
test_df

Unnamed: 0,t,feature1,feature2
0,1.5,0.4,0.3
1,2.0,0.6,0.1
2,0.5,0.2,0.5
3,1.0,0.3,0.4
4,2.5,0.5,0.2


In [64]:
# Ejemplp pequeño

# Creating a sample DataFrame to use with the optimize_theta function
data = {
    "feature1": [0.5, 0.6, 0.7, 0.8, 0.9],
    "feature2": [0.1, 0.2, 0.3, 0.4, 0.5],
    "feature3": [0.2, 0.3, 0.4, 0.5, 0.6],
    "recall_rate": [0.9, 0.8, 0.7, 0.6, 0.5],
    "lag_time": [1, 2, 3, 4, 5]
}
df = pd.DataFrame(data)

# Converting DataFrame columns to numpy arrays
X = df[["feature1", "feature2", "feature3"]].values
p = df["recall_rate"].values
Delta = df["lag_time"].values

# Set the parameters for optimization
D = len(df)  # Number of data instances
alpha = 0.5
lambda_ = 0.1
eta = 0.01

In [96]:
import numpy as np
from collections import defaultdict

class HalfLifeRegressionModel:
    def __init__(self, feature_columns, alpha=0.01, lambda_=0.1, eta=0.01, epsilon=1e-8):
        self.theta = np.random.randn(len(feature_columns))
        self.feature_columns = feature_columns
        self.alpha = alpha
        self.lambda_ = lambda_
        self.eta = eta
        self.epsilon = epsilon

    def _find_h_hat(self, x):
        theta_x = np.clip(np.dot(self.theta, x), -10, 10)
        h_hat = 2 ** theta_x
        return h_hat

    def _find_p_hat(self, h_hat, Delta):
        p_hat = 2 ** np.clip(-Delta / h_hat, -10, 10)  # Prevent underflow/overflow
        return p_hat

    def _cost_function(self, X, p, Delta):
        D = len(X)
        total_cost = 0

        for t in range(D):
            h_hat = self._find_h_hat(X[t])
            p_hat = self._find_p_hat(h_hat, Delta[t])
            h = -Delta[t] / np.log2(p[t] + self.epsilon)
            total_cost += (p[t] - p_hat) ** 2 + self.alpha * (h - h_hat) ** 2

        total_cost += self.lambda_ * np.sum(self.theta ** 2)
        return total_cost / D  # Average cost per instance

    def train(self, dataframe, n_iter=10000, tolerance=1e-5):
        X = dataframe[self.feature_columns].values
        p = dataframe['p'].values
        Delta = dataframe['t'].values
        grad_accumulation = np.zeros_like(self.theta)
        cost_history = []

        for iteration in range(n_iter):
            grad_theta = np.zeros_like(self.theta)
            cost = self._cost_function(X, p, Delta)

            for t in range(len(X)):
                h_hat = self._find_h_hat(X[t])
                p_hat = self._find_p_hat(h_hat, Delta[t])

                # Compute the gradients for each theta
                for k in range(len(self.theta)):
                    term1 = 2 * (p[t] - p_hat) * np.log(2) * p_hat * (2 ** (-Delta[t] / h_hat)) * X[t][k]
                    term2 = 2 * self.alpha * (h_hat + Delta[t] / np.log2(p[t])) * np.log(2) * h_hat * X[t][k]
                    term3 = 2 * self.lambda_ * self.theta[k]
                    grad_theta[k] += term1 + term2 - term3

            # Update the accumulated gradient
            grad_accumulation += grad_theta ** 2

            # Update theta using AdaGrad adjustment
            adjusted_eta = self.eta / (np.sqrt(grad_accumulation) + self.epsilon)
            self.theta -= adjusted_eta * grad_theta / len(X)

            cost_history.append(cost)

            # Check for convergence
            if iteration > 0 and np.abs(cost_history[-1] - cost_history[-2]) < tolerance:
                break

        return cost_history

    def predict(self, row):
        x = np.array([row[feature] for feature in self.feature_columns])
        h_hat = self._find_h_hat(x)
        p_hat = self._find_p_hat(h_hat, row['t'])
        return p_hat, h_hat


In [97]:
import pandas as pd
import numpy as np

# Simulación de datos de ejemplo
np.random.seed(0)  # Para reproducibilidad
data_size = 100  # Número de filas en el conjunto de datos

data = {
    'feature1': np.random.rand(data_size),
    'feature2': np.random.rand(data_size),
    't': np.random.rand(data_size) * 10,  # Supongamos que 't' está en un rango de 0 a 10
    'p': np.random.rand(data_size)  # Probabilidad de recuerdo simulada
}

# Convertir en DataFrame
dataframe = pd.DataFrame(data)
dataframe

Unnamed: 0,feature1,feature2,t,p
0,0.548814,0.677817,3.117959,0.906555
1,0.715189,0.270008,6.963435,0.774047
2,0.602763,0.735194,3.777518,0.333145
3,0.544883,0.962189,1.796037,0.081101
4,0.423655,0.248753,0.246787,0.407241
...,...,...,...,...
95,0.183191,0.490459,2.243170,0.958983
96,0.586513,0.227415,0.978445,0.355369
97,0.020108,0.254356,8.621915,0.356707
98,0.828940,0.058029,9.729195,0.016329


In [98]:
feature_columns = ['feature1', 'feature2']
model = HalfLifeRegressionModel(feature_columns)

# Entrenar el modelo
cost_history = model.train(dataframe)

In [99]:
cost_history

[4.47378907355776,
 4.47376968813008,
 4.473755977744204,
 4.4737447817841485,
 4.473735084798954]

In [103]:
# Hacer predicciones en el mismo conjunto de datos
predictions = []
for _, row in dataframe.iterrows():
    predicted_p, predicted_h = model.predict(row)
    predictions.append((predicted_p, predicted_h))

# Agregar las predicciones al dataframe para comparar
dataframe['predicted_p'] = [pred[0] for pred in predictions]
dataframe['predicted_h'] = [pred[1] for pred in predictions]

# Mostrar los primeros 10 resultados
dataframe.head(10)

Unnamed: 0,feature1,feature2,t,p,predicted_p,predicted_h
0,0.548814,0.677817,3.117959,0.906555,0.16719,1.208307
1,0.715189,0.270008,6.963435,0.774047,0.029452,1.369274
2,0.602763,0.735194,3.777518,0.333145,0.119448,1.232251
3,0.544883,0.962189,1.796037,0.081101,0.344671,1.168755
4,0.423655,0.248753,0.246787,0.407241,0.866406,1.192875
5,0.645894,0.576157,0.672496,0.232234,0.694845,1.280369
6,0.437587,0.592042,6.793928,0.132488,0.017029,1.156243
7,0.891773,0.572252,4.536968,0.053427,0.112907,1.441774
8,0.963663,0.223082,5.365792,0.725594,0.090923,1.551162
9,0.383442,0.952749,8.966713,0.011427,0.00321,1.08254
