In [1]:
import torch
import torch.utils.data as data
import torch.optim as optim
import torch.nn as nn
from torchsummary import summary
import time
import numpy as np
import os
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from pathlib import Path
import pandas as pd

from torchmetrics import MeanSquaredError
from torchmetrics import MeanAbsoluteError

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
income_frame = pd.read_csv("data.csv")
income_frame.head()

Unnamed: 0,total_sessions_day0,total_sessions_day1,total_sessions_day3,total_sessions_day7,chapters_finished_day0,chapters_finished_day1,chapters_finished_day3,chapters_finished_day7,chapters_opened_day0,chapters_opened_day1,...,install_date,country_code,ad_ltv_day0,ad_ltv_day1,ad_ltv_day3,platform,target_sub_ltv_day30,target_iap_ltv_day30,target_ad_ltv_day30,target_full_ltv_day30
0,1.0,1.0,1.0,1.0,0,0,0,0,0,0,...,2021-12-03,COUNTRY_135,0.0,0.0,0.0,ios,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,1.0,0,0,0,0,0,0,...,2021-12-03,COUNTRY_141,0.0,0.0,0.0,android,0.0,0.0,0.0,0.0
2,1.0,2.0,2.0,2.0,0,0,0,0,0,0,...,2021-12-03,COUNTRY_141,0.0,0.0,0.0,android,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,1.0,0,0,0,0,0,0,...,2021-12-03,COUNTRY_141,0.0,0.0,0.0,ios,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,1.0,0,0,0,0,0,0,...,2021-12-03,COUNTRY_203,0.0,0.0,0.0,android,0.0,0.0,0.0,0.0


In [3]:
class LTV_data(Dataset):
    def __init__(self, df):
        self.df = df
    
    def __getitem__(self, index):
        row = self.df.iloc[index]
        feature = row.values[:52].astype(np.float32)
        target = row.values[59:62].astype(np.float32)
        return feature, target

    def __len__(self) -> int:
        return len(self.df)

    @staticmethod
    def create_dataset(df, transform=None):
       dataset = LTV_data(df)
       return dataset

    @staticmethod
    def loader(dataset, batch_size, num_workers=0):
       data_loader = DataLoader(
          dataset,
          batch_size=batch_size,
          shuffle=True,
          num_workers=num_workers,
      )
       return data_loader

In [4]:
all_data = LTV_data.create_dataset(income_frame)
train_set, val_set, test_set = torch.utils.data.random_split(all_data, [all_data.__len__() - 20000, 10000, 10000])
train_loader = LTV_data.loader(train_set, 64)
val_loader = LTV_data.loader(val_set, 64)
test_loader = LTV_data.loader(test_set, 64)

In [107]:
features, targets = next(iter(train_loader))

In [92]:
EPOCHS = 2
input_size = 52
learning_rate = 0.001

In [29]:
class LTV_model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.f1 = nn.Sequential(nn.Linear(input_size, 64), nn.Sigmoid(), nn.Linear(64, 32), nn.Sigmoid())
        self.f2 = nn.Linear(32, 3)

    def forward(self, X):
        X = self.f1(X)
        X = F.dropout(X, p = 0.1)
        X = self.f2(X)
        return X

In [90]:
model = LTV_model()
#checkpoint = torch.load('./model.pt')
#model.load_state_dict(checkpoint['model_state_dict'])
optimizer = optim.SGD(model.parameters(), lr = learning_rate)
criterion = nn.MSELoss()

In [93]:
def train(model, loader, opt, criterion):
    denominator = 100
    print('training to', len(loader) // denominator, end=':')

    mse = 0
    mae = 0

    model.train()
    
    i = 1
    for (features, targets) in loader:
        if i % denominator == 0:
            print(i // denominator, end=';')
            if i % (denominator * 100) == 0:
                print('')
        i += 1

        opt.zero_grad()
        
        output = model(features)
        #print(output)
        loss = criterion(output, targets)
        
        loss.backward()

        mse += MeanSquaredError()(output, targets).item()
        mae += MeanAbsoluteError()(output, targets).item()

        opt.step()
        #print(mse, end=';')
        #if i == 2:
        #    break
    return mse / len(loader), mae / len(loader)

In [40]:
def evaluate(model, loader, opt, criterion):
    denominator = 100
    print('validation to', len(loader) // denominator, end=':')

    mse = 0
    mae = 0

    model.evaluate()
    
    i = 1
    with torch.no_grad():
        for (features, targets) in loader:
            if i % denominator == 0:
                print(i // denominator, end=';')
            if i % (denominator * 100) == 0:
                print('')
            i += 1
            
            output = model(features)

            mse += MeanSquaredError()(output, targets).item()
            mae += MeanAbsoluteError()(output, targets).item()
        
    return mse / len(loader), mae/len(loader)

In [36]:
train_mse_list = []
train_mae_list = []
val_mse_list = []
val_mae_list = []
test_mse_list = []
test_mae_list = []

In [108]:
for epoch in range(EPOCHS):
    print("Epoch-%d: " % (epoch))

    train_start_time = time.monotonic()
    train_mse, train_mae = train(model, train_loader, optimizer, criterion)
    train_end_time = time.monotonic()
    
    '''
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_mse,
            }, './model.pt')
        '''
    #val_start_time = time.monotonic()
    #val_mse, val_mae = evaluate(model, val_data_loader, criterion)
    #val_end_time = time.monotonic()
    
    #test_start_time = time.monotonic()
    #test_mse, test_mae = evaluate(model, test_data_loader, criterion)
    #test_end_time = time.monotonic()
    
    train_mse_list.append(train_mse)
    train_mae_list.append(train_mae)
    #val_mse_list.append(val_mse)
    #val_mae_list.append(val_mae)
    #test_mse_list.append(test_mse)
    #test_mae_list.append(test_mae)
    
    print("Training: MSE = %.4f, MAE = %.4f, Time = %.2f seconds" % (train_mse, train_mae, train_end_time - train_start_time))
    #print("Validation: MSE = %.4f, MAE = %.4f, Time = %.2f seconds" % (val_mse, val_mae, val_end_time - val_start_time))
    #print("Test: MSE = %.4f, MAE = %.4f, Time = %.2f seconds" % (test_mse, test_mae, val_end_time - val_start_time))
    print("")

In [None]:
checkpoint = torch.load('./model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
print(model.state_dict())

In [18]:
money = income_frame[['target_full_ltv_day30']]
count = 0
i = 0
for row in money.iterrows():
    if row[1].iloc[0] != 0.0:
        count += 1
print(count)
print(count / len(money))

343293
0.23387758272063833
