In [34]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import math

In [38]:
# Constants
MIN_HALF_LIFE = 15.0 / (24 * 60)  # 15 minutes
MAX_HALF_LIFE = 274.0             # 9 months
LN2 = math.log(2)

def pclip(p):
    return tf.clip_by_value(p, 0.0001, 0.9999)

def hclip(h):
    return tf.clip_by_value(h, MIN_HALF_LIFE, MAX_HALF_LIFE)

In [73]:
def estimate_h_hat(theta, x):
    estimated_h = 2 ** tf.tensordot(x, theta, axes=1)
    return tf.squeeze(estimated_h)  # Remove extra dimensions if any


def estimate_p_hat(delta, estimated_h):
    predicted_p = 2 ** (-delta / estimated_h)
    return predicted_p


def hh_loss_function(p, predicted_p, estimated_h, delta, theta, regularization, lambda_param=0.1, alpha_param=0.01):
    loss_p = tf.square(p - predicted_p)
    loss_h = tf.square((-delta / tf.math.log(p) / tf.math.log(2.0)) - estimated_h)
    
    # Sum (or take mean) over all observations to get a scalar loss
    loss_p_sum = tf.reduce_sum(loss_p)
    loss_h_sum = tf.reduce_sum(loss_h)
    
    if regularization == 'l2':
        regularization_term = lambda_param * tf.reduce_sum(tf.square(theta))
    elif regularization == 'l1':
        regularization_term = lambda_param * tf.reduce_sum(tf.abs(theta))
    
    loss = loss_p + alpha_param * loss_h + regularization_term
    return loss


In [19]:
data      = pd.read_csv( 'subset_1000.csv' )
data

Unnamed: 0.1,Unnamed: 0,p,t,right,wrong,bias,lexeme,h,a,lang,right_this,wrong_this,ts,uid,lexeme_orig,datetime,decimal_hours
0,67736,0.5000,42.049549,2.236068,1.732051,1.0,en:europe/europe<np><loc><sg>,42.049549,0.423607,es->en,1,2,1362238751,u:dv9l,europe/europe<np><loc><sg>,2013-03-02 15:39:11,15.653056
1,209424,0.9999,0.005347,3.000000,1.000000,1.0,en:in/in<pr>,37.062267,0.416667,es->en,1,1,1362510201,u:gZip,in/in<pr>,2013-03-05 19:03:21,19.055833
2,446627,0.9999,0.904109,3.162278,1.414214,1.0,en:sees/see<vblex><pri><p3><sg>,274.000000,0.368734,es->en,1,1,1362094794,u:iegs,sees/see<vblex><pri><p3><sg>,2013-02-28 23:39:54,23.665000
3,141119,0.9999,1.104167,6.403124,1.000000,1.0,en:we/prpers<prn><subj><p1><mf><pl>,274.000000,0.190980,es->en,1,1,1362267676,u:feOc,we/prpers<prn><subj><p1><mf><pl>,2013-03-02 23:41:16,23.687778
4,50332,0.9999,0.090972,6.557439,3.464102,1.0,en:eats/eat<vblex><pri><p3><sg>,274.000000,0.150131,es->en,2,2,1362183822,u:dTHC,eats/eat<vblex><pri><p3><sg>,2013-03-02 00:23:42,0.395000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,454337,0.5000,0.026470,1.732051,1.000000,1.0,en:though/though<cnjsub>,0.026470,0.622008,es->en,1,2,1362322756,u:ih9Y,though/though<cnjsub>,2013-03-03 14:59:16,14.987778
996,3241,0.9999,0.000197,2.236068,1.732051,1.0,en:figure/figure<n><sg>,1.363763,0.423607,es->en,2,2,1362397491,u:Mh8,figure/figure<n><sg>,2013-03-04 11:44:51,11.747500
997,448452,0.9999,0.002164,5.291503,1.732051,1.0,en:drinks/drink<vblex><pri><p3><sg>,15.001394,0.220955,es->en,1,1,1362883254,u:ifMk,drinks/drink<vblex><pri><p3><sg>,2013-03-10 02:40:54,2.681667
998,70989,0.0001,0.020093,2.000000,1.414214,1.0,en:flute/flute<n><sg>,0.010417,0.500000,es->en,0,2,1362517638,u:dzG6,flute/flute<n><sg>,2013-03-05 21:07:18,21.121667


In [20]:
data.columns

Index(['Unnamed: 0', 'p', 't', 'right', 'wrong', 'bias', 'lexeme', 'h', 'a',
       'lang', 'right_this', 'wrong_this', 'ts', 'uid', 'lexeme_orig',
       'datetime', 'decimal_hours'],
      dtype='object')

In [28]:
import pandas as pd
import tensorflow as tf

# Assuming 'data' is your DataFrame
x_data = data[['right', 'wrong', 'bias']]  # Replace with your column names for x
delta_data = data['t']  # Replace with your column name for delta
p_data = data['p']  # Replace with your column name for p

In [81]:
tf.Variable(tf.random.normal([x.shape[1], 1]))*0.001

<tf.Tensor: shape=(3, 1), dtype=float32, numpy=
array([[-0.00055976],
       [-0.0004415 ],
       [ 0.00032743]], dtype=float32)>

In [84]:
# Example data (replace these with your actual data)
x = tf.constant(x_data, dtype=tf.float32)
delta = tf.constant(delta_data, dtype=tf.float32)
p = tf.constant(p_data, dtype=tf.float32)

# Initialize theta (model parameters)
theta = tf.Variable(tf.random.normal([x.shape[1], 1])*0.001, name='theta')

# Optimizer
optimizer = tf.optimizers.Adagrad(learning_rate=0.001)
# optimizer = tf.optimizers.SGD(learning_rate=0.01)

# Optimization loop
for i in range(1000):
    with tf.GradientTape() as tape:
        estimated_h = estimate_h_hat(theta, x)
        estimated_h = hclip(estimated_h)
        
        predicted_p = estimate_p_hat(delta, estimated_h)
        predicted_p = pclip(predicted_p)
        
        loss = hh_loss_function(p, predicted_p, estimated_h, delta, theta, regularization='l2')
    
    gradients = tape.gradient(loss, [theta])
    optimizer.apply_gradients(zip(gradients, [theta]))
    
# Calculate and print the gradient norm
    grad_norm = tf.norm(gradients[0])
    if i % 100 == 0:
        print(f"Gradient Norm: {grad_norm.numpy()}")

Gradient Norm: 4354946.0
Gradient Norm: nan
Gradient Norm: nan
Gradient Norm: nan
Gradient Norm: nan
Gradient Norm: nan
Gradient Norm: nan
Gradient Norm: nan
Gradient Norm: nan
Gradient Norm: nan


In [67]:
optimized_theta = theta.numpy()
print("Optimized Theta:", optimized_theta)

Optimized Theta: [[0.53253347]
 [0.73516107]
 [0.668873  ]]


In [69]:
loss.numpy().shape

(1000,)

In [70]:
data.shape

(1000, 17)

In [72]:
print("Loss shape:", tf.shape(loss))


Loss shape: tf.Tensor([1000], shape=(1,), dtype=int32)


In [76]:
grad_norm.numpy()

26220648.0