In [None]:
import os
import sys
import pymysql
import numpy as np
import pandas as pd
import re
import datetime
import category_encoders
import joblib

from Config import params_config, query_config, db_config
from Utils.bulk_insert import BulkInsert

import warnings
warnings.filterwarnings('ignore')

## fit_race_info_into_model.py

In [None]:
queries = query_config.queries
parameters = params_config.parameters
db_params = db_config.db_params
con = pymysql.connect(**db_params)

In [None]:
def fetchall_and_make_list_by(query, con):
    try:
        cursor = con.cursor()
        cursor.execute(query)
        fetch_result = cursor.fetchall()
        fetch_result_list = [item for item in fetch_result]
        cursor.close()
        return fetch_result_list
    except Exception as e:
        print(e)

def get_training_race_data_frame(queries, parameters, con):
    selected_query = queries['TRAINING_DATA_FROM_MASTER_PRIOR_RESULT']
    training_race_data_list = fetchall_and_make_list_by(selected_query, con)
    training_race_data_frame = pd.DataFrame(training_race_data_list, 
                                          columns=parameters['DATAFRAME_COL_NAMES']['training_race_data_cols'])
    return training_race_data_frame

In [None]:
training_race_df =  get_training_race_data_frame(queries, parameters, con)

In [None]:
training_race_df.shape

In [None]:
training_race_df.head()

## Class: Preprocessing

### Features from master

In [None]:
def _get_year_month_day_from_race_timing(x):
    date_str = re.match('([0-9]+)/([0-9]+)/([0-9]+)' , x).group()
    year = datetime.datetime.strptime(date_str, '%Y/%m/%d').year
    month = datetime.datetime.strptime(date_str, '%Y/%m/%d').month
    day = datetime.datetime.strptime(date_str, '%Y/%m/%d').day
    return pd.Series([year, month, day])

def _get_dow_from_race_timing(x):
    return re.search("土|日" , x).group() 

def _encode_dow(df):
    dow_mapping = {'土': 1, '日': 2}
    return df['dow'].map(dow_mapping)    

def _get_time_in_the_racecourse_from_race_timing(x):
    return int(re.split('([0-9]+)回([ぁ-んァ-ン 一-龥]+)([0-9]+)日目' , x)[1])

def _get_racecourse_from_race_timing(x):
    return re.split('([0-9]+)回([ぁ-んァ-ン 一-龥]+)([0-9]+)日目' , x)[2]

def _get_what_day_in_the_racecourse_from_race_timing(x):
    return int(re.split('([0-9]+)回([ぁ-んァ-ン 一-龥]+)([0-9]+)日目' , x)[3])

def _encode_race_course(df):
    race_course_mapping = {'函館': 1, '札幌': 2, '福島': 3, '東京': 4, '中山': 5, '新潟': 6, '中京': 7, '阪神': 8, '京都': 9, '小倉': 10}
    return df['race_course'].map(race_course_mapping)

In [None]:
def preprocess_race_timing(df):
    df[['year', 'month', 'day']] = df['race_timing'].apply(_get_year_month_day_from_race_timing)
    df['dow'] = df['race_timing'].apply(_get_dow_from_race_timing)
    df['dow_encoded'] = _encode_dow(df)
    df['race_course'] =  df['race_timing'].apply(_get_racecourse_from_race_timing)
    df['race_course_encoded'] = _encode_race_course(df)
    df['time_in_racecourse'] =  df['race_timing'].apply(_get_time_in_the_racecourse_from_race_timing)
    df['what_day_in_racecourse'] =  df['race_timing'].apply(_get_what_day_in_the_racecourse_from_race_timing)
    return df

In [None]:
training_race_df = preprocess_race_timing(training_race_df)

In [None]:
def encode_race_weather(df):
    race_weather_mapping = {'晴': 1, '曇': 2, '小雨': 3, '雨': 4, '小雪': 5, '雪':6, 'unknown':7}
    return df['race_weather'].map(race_weather_mapping)

In [None]:
training_race_df['race_weather_encoded'] = encode_race_weather(training_race_df)

In [None]:
def encode_race_condition(df):
    race_condition_mapping = {'良': 1, '稍': 2, '重': 3, '不': 4, 'unknown':5}
    return df['race_condition'].map(race_condition_mapping)

In [None]:
training_race_df['race_condition_encoded'] = encode_race_condition(training_race_df)

In [None]:
def encode_fit_and_transform_href_to_the_horse(df):
    if parameters['HYPER_PARAMETERS']['CATEGORY_ENCODERS_FOR_HORSE']=='TargetEncoder':
        ce = category_encoders.TargetEncoder(cols=['href_to_the_horse'])
    elif parameters['HYPER_PARAMETERS']['CATEGORY_ENCODERS_FOR_HORSE']=='OrdinalEncoder':
        ce = category_encoders.OrdinalEncoder(cols=['href_to_the_horse'])
        
    ce.fit(df, 
           df[parameters['DATAFRAME_COL_NAMES']['target_col']],
           handle_unknown=parameters['HYPER_PARAMETERS']['CATEGORY_ENCODERS_HANDLE_UNKNOWN'])
    joblib.dump(ce, parameters['FILE_NAME_OF_HORSE_CATEGORY_ENCODERS'])
    
    df_ce = ce.transform(df)
    df_ce = df_ce.rename(columns={'href_to_the_horse': 'href_to_the_horse_encoded'})
    return pd.concat([df, df_ce['href_to_the_horse_encoded']], axis=1)

In [None]:
# ce_loaded = joblib.load(parameters['FILE_NAME_OF_CATEGORY_ENCODERS'])
# ce_loaded

In [None]:
training_race_df = encode_fit_and_transform_href_to_the_horse(training_race_df)

### Features from prior or result

In [None]:
def _get_horse_age_and_sex_in_result(x):
    horse_sex = re.split('([ぁ-んァ-ン 一-龥]+)([0-9]+)' , x)[1]
    horse_age = int(re.split('([ぁ-んァ-ン 一-龥]+)([0-9]+)' , x)[2])
    return pd.Series([horse_sex, horse_age])

def  _encode_horse_sex(df_about_horse_sex):
    horse_sex_mapping = {'牡': 1, '牝': 2, 'セ': 3}
    return df_about_horse_sex.map(horse_sex_mapping)

def preprocess_horse_sex_age(df, target_cols_type):
    if target_cols_type == 'result':
        df[['horse_sex', 'horse_age']] = df['horse_sex_age_in_result'].apply(_get_horse_age_and_sex_in_result)
        df['horse_sex_encoded'] = _encode_horse_sex(df['horse_sex'])
    elif target_cols_type == 'prior':
        df['horse_age'] = pd.to_numeric(training_race_df["horse_age_in_prior"], errors='coerce')
        df['horse_sex_encoded'] = _encode_horse_sex(df['horse_sex_in_prior'])      
    return df

In [None]:
training_race_df = preprocess_horse_sex_age(df=training_race_df, target_cols_type='result')

In [None]:
# training_race_df = training_race_df[training_race_df['horse_weight_in_result']!='計不(---)']

In [None]:
def _parse_horse_weight_increment(x):
    return int(x.replace('＋', '+').replace('－', '-').replace('---', '0'))

def _get_horse_weight_info_in_result(x):
    horse_weight = int(re.split('(\()(.*)(\))' , x)[0])
    horse_weight_increment_str = re.split('(\()(.*)(\))' , x)[2]
    horse_weight_increment = _parse_horse_weight_increment(horse_weight_increment_str)
    return pd.Series([horse_weight, horse_weight_increment])

def _get_horse_weight_in_prior(x):
    try:
        return int(re.search("[0-9]+" , x).group())
    except TypeError:
        return np.nan

def _get_horse_weight_increment_in_prior(x):
    try:
        horse_weight_increment_str = re.split('(\()(.*)(kg\))' , x)[2]
        horse_weight_increment = _parse_horse_weight_increment(horse_weight_increment_str)
        return horse_weight_increment
    except TypeError:
        return np.nan

def preprocess_horse_weight_and_increment(df, target_cols_type):
    if target_cols_type == 'result':
        df[['horse_weight', 'horse_weight_increment']] = df['horse_weight_in_result'].apply(_get_horse_weight_info_in_result)
    elif target_cols_type == 'prior':
        df['horse_weight'] = df['horse_weight_in_prior'].apply(_get_horse_weight_in_prior)
        df['horse_weight_increment'] = df['horse_weight_increment_in_prior'].apply(_get_horse_weight_increment_in_prior)
    return df

In [None]:
training_race_df = preprocess_horse_weight_and_increment(df=training_race_df, target_cols_type='result')

In [None]:
def _get_and_encode_weight_loss_flg(x):
    try:
        weight_loss_flg = re.search('▲|△|☆' , x).group()
        weight_loss_encode = int(weight_loss_flg.replace('▲', '3').replace('△', '2').replace('☆', '1'))
    except AttributeError:
        weight_loss_encode = 0
    return weight_loss_encode

def _get_horse_impost_in_prior(x):
    try:
        return float(re.split('(▲|△|☆|.)(.*)(\()(.*)(\))(.*)' , x)[4])
    except TypeError:
        return np.nan

def _get_weight_loss_encode_in_prior(x):
    try:
        weight_loss_flg_str = re.split('(▲|△|☆|.)(.*)(\()(.*)(\))(.*)' , x)[1]
        return _get_and_encode_weight_loss_flg(weight_loss_flg_str)
    except TypeError:
        return np.nan

def preprocess_jockey_name(df, target_cols_type):
    if target_cols_type == 'result':
        df['horse_impost'] = df['horse_impost_in_result']
        df['weight_loss_encode'] = df['jockey_name_in_result'].apply(_get_and_encode_weight_loss_flg)
    elif target_cols_type == 'prior':
        df['horse_impost'] = df['jockey_name_and_horse_impost_in_prior'].apply(_get_horse_impost_in_prior)
        df['weight_loss_encode'] = df['jockey_name_and_horse_impost_in_prior'].apply(_get_weight_loss_encode_in_prior)
    return df

In [None]:
training_race_df = preprocess_jockey_name(df=training_race_df, target_cols_type='result')

In [None]:
def encode_fit_and_transform_href_to_the_jockey(df):
    if parameters['HYPER_PARAMETERS']['CATEGORY_ENCODERS_FOR_JOCKEY']=='TargetEncoder':
        ce = category_encoders.TargetEncoder(cols=['href_to_the_jockey'])
    elif parameters['HYPER_PARAMETERS']['CATEGORY_ENCODERS_FOR_JOCKEY']=='OrdinalEncoder':
        ce = category_encoders.OrdinalEncoder(cols=['href_to_the_jockey'])
        
    ce.fit(df, 
           df[parameters['DATAFRAME_COL_NAMES']['target_col']],
           handle_unknown=parameters['HYPER_PARAMETERS']['CATEGORY_ENCODERS_HANDLE_UNKNOWN'])
    joblib.dump(ce, parameters['FILE_NAME_OF_JOCKEY_CATEGORY_ENCODERS'])
    
    df_ce = ce.transform(df)
    df_ce = df_ce.rename(columns={'href_to_the_jockey': 'href_to_the_jockey_encoded'})
    return pd.concat([df, df_ce['href_to_the_jockey_encoded']], axis=1)

In [None]:
# ce_loaded = joblib.load(parameters['FILE_NAME_OF_JOCKEY_CATEGORY_ENCODERS'])
# ce_loaded

In [None]:
training_race_df = encode_fit_and_transform_href_to_the_jockey(training_race_df)

In [None]:
def _get_trainer_belonging_in_result(x):
    return re.split('\[(.*)\]' , x)[1]

def _get_trainer_belonging_in_prior(x):
    try:
        return re.split('(.*)(・)(.*)' , x)[1]
    except TypeError:
        return np.nan

def _encode_trainer_belonging(df):
    trainer_belonging_mapping = {'美': 1, '栗': 2, '招': 3}
    return df['trainer_belonging'].map(trainer_belonging_mapping)

def preprocess_trainer_name(df, target_cols_type):
    if target_cols_type == 'result':
        df['trainer_belonging'] = df['trainer_name_in_result'].apply(_get_trainer_belonging_in_result)
        df['trainer_belonging_encoded'] = _encode_trainer_belonging(df)
    elif target_cols_type == 'prior':
        df['trainer_belonging'] = df['trainer_name_in_prior'].apply(_get_trainer_belonging_in_prior)
        df['trainer_belonging_encoded'] = _encode_trainer_belonging(df)
    return df

In [None]:
training_race_df = preprocess_trainer_name(df=training_race_df, target_cols_type='result')

In [None]:
# def encode_fit_and_transform_href_to_the_trainer(df):
#     if parameters['HYPER_PARAMETERS']['CATEGORY_ENCODERS_FOR_TRAINER']=='TargetEncoder':
#         ce = category_encoders.TargetEncoder(cols=['href_to_the_trainer'])
#     elif parameters['HYPER_PARAMETERS']['CATEGORY_ENCODERS_FOR_TRAINER']=='OrdinalEncoder':
#         ce = category_encoders.OrdinalEncoder(cols=['href_to_the_trainer'])
        
#     ce.fit(df, 
#            df[parameters['DATAFRAME_COL_NAMES']['target_col']],
#            handle_unknown=parameters['HYPER_PARAMETERS']['CATEGORY_ENCODERS_HANDLE_UNKNOWN'])
#     joblib.dump(ce, parameters['FILE_NAME_OF_TRAINER_CATEGORY_ENCODERS'])
    
#     df_ce = ce.transform(df)
#     df_ce = df_ce.rename(columns={'href_to_the_trainer': 'href_to_the_trainer_encoded'})
#     return pd.concat([df, df_ce['href_to_the_trainer_encoded']], axis=1)

In [None]:
# training_race_df = encode_fit_and_transform_href_to_the_trainer(training_race_df)

In [None]:
def _categorize_arrival_order(x):
    if x == 1:
        arrival_order_category = parameters['MODEL_TARGET_RANK_LABEL']['first']
    elif x == 2:
        arrival_order_category = parameters['MODEL_TARGET_RANK_LABEL']['second']
    elif x == 3:
        arrival_order_category = parameters['MODEL_TARGET_RANK_LABEL']['third']
    else:
        arrival_order_category = parameters['MODEL_TARGET_RANK_LABEL']['others']
    return arrival_order_category

def preprocess_arrival_order(df):
    df['arrival_order_category'] = df['arrival_order'].apply(_categorize_arrival_order)
    return df

In [None]:
training_race_df = preprocess_arrival_order(df=training_race_df)

In [None]:
training_race_df.head()

## Check wether Preprocess Class works

In [None]:
from Model.Preprocessing import Preprocessing

In [None]:
pp = Preprocessing(parameters)

In [None]:
def preprocess_result_data_based_training_race_df(df, pp):
    df = pp.preprocess_race_timing(df=df)
    df = pp.encode_race_weather(df=df)
    df = pp.encode_race_condition(df=df)
    df = pp.encode_fit_and_transform_href_to_the_horse(df=df)
    df = pp.preprocess_horse_sex_age(df=df, target_cols_type='result')
    df = pp.preprocess_horse_weight_and_increment(df=df, target_cols_type='result')
    df = pp.preprocess_jockey_name(df=df, target_cols_type='result')
    df = pp.encode_fit_and_transform_href_to_the_jockey(df=df)
    df = pp.preprocess_trainer_name(df=df, target_cols_type='result')
    df = pp.preprocess_arrival_order(df=df)
    return df

In [None]:
training_race_df_preprocessed = preprocess_result_data_based_training_race_df(training_race_df, pp)

In [None]:
training_race_df_preprocessed.head()

## Modeling Process

In [None]:
def make_dataset_to_model_fit(df):
    train_df = df[(df['year']<parameters['CRITERIA_TO_SPLIT_TRAINING_DATA']['year']) | (df['month']<parameters['CRITERIA_TO_SPLIT_TRAINING_DATA']['month'])]
    validataion_df = df[(df['year']>=parameters['CRITERIA_TO_SPLIT_TRAINING_DATA']['year']) & (df['month']>=parameters['CRITERIA_TO_SPLIT_TRAINING_DATA']['month'])]
    
    x_train = np.array(train_df[parameters['DATAFRAME_COL_NAMES']['feature_cols_part1']])
    group_train = np.array(train_df[parameters['DATAFRAME_COL_NAMES']['query_cols']])
    y_train = np.array(train_df[parameters['DATAFRAME_COL_NAMES']['target_col']])
    x_valid = np.array(validataion_df[parameters['DATAFRAME_COL_NAMES']['feature_cols_part1']])
    group_valid = np.array(validataion_df[parameters['DATAFRAME_COL_NAMES']['query_cols']])
    y_valid = np.array(validataion_df[parameters['DATAFRAME_COL_NAMES']['target_col']])
    
    return x_train, group_train,  y_train, x_valid, group_valid, y_valid

In [None]:
x_train, group_train,  y_train, x_valid, group_valid, y_valid = make_dataset_to_model_fit(df=training_race_df_preprocessed)

In [None]:
print(x_train.shape)
print(group_train.shape)
print(y_train.shape)

print(x_valid.shape)
print(group_valid.shape)
print(y_valid.shape)

In [None]:
pd.DataFrame(y_train).groupby(y_train_df.values).count()

In [None]:
pd.DataFrame(y_valid).groupby(y_valid_df.values).count()

### Fit the model

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

from sklearn.metrics import classification_report, f1_score, recall_score, precision_score, confusion_matrix, roc_curve, auc
from sklearn.metrics.scorer import make_scorer
from scipy.stats import randint as sp_randint

#### Case when no tuning

In [None]:
rf_clf = RandomForestClassifier(random_state=0)
rf_clf.fit(x_train, y_train)

In [None]:
y_valid_pred = rf_clf.predict(x_valid)
pd.Series(y_valid_pred).groupby(pd.Series(y_valid_pred).values).count()

In [None]:
print(classification_report(y_valid, y_valid_pred))

### Hyper parameters tuning

In [None]:
parameters["HYPER_PARAMETERS"]['RF_CLF'] = {
    'CV_WAYS': 'GridSearchCV',  # 'GridSearchCV', 'RandomizedSearchCV'
    'GS_PARAMS': {'n_estimators': [10, 50, 100], 
                                 'max_depth': [5, 10, 20], 
                                 'max_features': ['sqrt', 'log2', None],
                                 'class_weight': ['balanced', None]},
    'RS_PARAMS': {'n_estimators': sp_randint(100, 5000), 
                                 'max_depth': sp_randint(5, 50), 
                                 'max_features': ['sqrt', 'log2', None],
                                 'class_weight': ['balanced', None]}
}

#### Case when RandomizedSearchCV

In [None]:
cv = RandomizedSearchCV(estimator=RandomForestClassifier(random_state=0),
                                    param_distributions=parameters["HYPER_PARAMETERS"]['RF_CLF']['RS_PARAMS'],
                                    n_iter=30, #54,
                                    scoring="roc_auc",
                                    cv=3,
                                    verbose=1,
                                    n_jobs=-1,          
                                    random_state=1)

In [None]:
cv.fit(x_train, y_train)

In [None]:
cv.best_estimator_

In [None]:
cv.best_params_

In [None]:
rf_clf = RandomForestClassifier(random_state=0,
                               n_estimators=cv.best_params_['n_estimators'],
                               max_depth=cv.best_params_['max_depth'],
                               max_features=cv.best_params_['max_features'],
                               class_weight=cv.best_params_['class_weight'])
rf_clf.fit(x_train, y_train)

In [None]:
y_valid_pred = rf_clf.predict(x_valid)
print(classification_report(y_valid, y_valid_pred))

{'class_weight': 'balanced',
 'max_depth': 26,
 'max_features': 'log2',
 'n_estimators': 982}

In [None]:
y_valid_pred = rf_clf.predict(x_valid)
print(classification_report(y_valid, y_valid_pred))

#### Case when GridSearchCV

In [None]:
cv = GridSearchCV(estimator=RandomForestClassifier(random_state=0),
                    param_grid=parameters["HYPER_PARAMETERS"]['RF_CLF']['GS_PARAMS'],
                    scoring="f1_micro",
                    cv=3,
                    verbose=1,
                    n_jobs=-1) 

In [None]:
cv.fit(x_train, y_train)

In [None]:
cv.best_estimator_

In [None]:
cv.best_params_

In [None]:
rf_clf = RandomForestClassifier(random_state=0,
                               n_estimators=cv.best_params_['n_estimators'],
                               max_depth=cv.best_params_['max_depth'],
                               max_features=cv.best_params_['max_features'],
                               class_weight=cv.best_params_['class_weight'])
rf_clf.fit(x_train_df, y_train_df)

In [None]:
y_valid_pred = rf_clf.predict(x_valid)
print(classification_report(y_valid, y_valid_pred))

#### To make evaluation func

In [None]:
y_valid_pred_proba = rf_clf.predict_proba(x_valid)
y_valid_pred_proba

In [None]:
fpr, tpr, thresholds = roc_curve(y_valid_df, y_valid_pred_proba[:, 1])
auc_score = auc(fpr, tpr)
auc_score

In [None]:
import matplotlib.pyplot as plt

plt.plot(fpr, tpr, label='ROC curve (area = %.3f)'%auc_score)
plt.legend()
plt.title('ROC curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.grid(True)

In [None]:
def calc_score_by_first_order(y_flag: np.array, y_pred_proba: np.array, group_ids: np.array):
    y_combined = np.c_[y_flag, y_pred_proba]
    y_flag_proba = np.empty((0, 2))
    for race_id in pd.unique(group_ids):
        y_combined_by_race = y_combined[group_ids==race_id]
        y_combined_by_race_sorted = y_combined_by_race[y_combined_by_race[:,-1].argsort()[::-1]]
        y_combined_by_race_sorted = np.c_[y_combined_by_race_sorted, np.zeros(len(y_combined_by_race_sorted))]
        y_combined_by_race_sorted[0, -1] = 1
        y_flag_proba = np.append(y_flag_proba, y_combined_by_race_sorted[:, (0,-1)], axis=0)
    
    return y_flag_proba, f1_score(y_flag_proba[:, 0], y_flag_proba[:, 1]) 

In [None]:
y_flag_proba, f1_pred_score = calc_score_by_first_order(y_flag=y_valid, y_pred_proba=y_valid_pred_proba, group_ids=group_valid)

In [None]:
f1_pred_score

In [None]:
print(classification_report(y_valid, y_flag_proba[:, 1]))

In [None]:
feature_importance_df = pd.DataFrame(np.c_[np.array(x_train_df.columns), rf_clf.feature_importances_],
                                    columns=['features', 'importance']).sort_values(by=['importance'], ascending=False)
feature_importance_df

### Try Learning to Rank

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
class Net(nn.Module):
    def __init__(self, D):
        super(Net, self).__init__()
        self.l1 = nn.Linear(D, 10)
        self.l2 = nn.Linear(10, 1)

    def forward(self, x):
        x = torch.sigmoid(self.l1(x))
        x = self.l2(x)
        return x

In [None]:
def listnet_loss(y_i, z_i):
    """
    y_i: (n_i, 1)
    z_i: (n_i, 1)
    """

    P_y_i = F.softmax(y_i, dim=0)
    P_z_i = F.softmax(z_i, dim=0)
    return - torch.sum(P_y_i * torch.log(P_z_i))

def make_dataset(N_train, N_valid, D):
    ws = torch.randn(D, 1)

    X_train = torch.randn(N_train, D, requires_grad=True)
    X_valid = torch.randn(N_valid, D, requires_grad=True)

    ys_train_score = torch.mm(X_train, ws)
    ys_valid_score = torch.mm(X_valid, ws)

    bins = [-2, -1, 0, 1]  # 5 relevances
    ys_train_rel = torch.Tensor(
        np.digitize(ys_train_score.clone().detach().numpy(), bins=bins)
    )
    ys_valid_rel = torch.Tensor(
        np.digitize(ys_valid_score.clone().detach().numpy(), bins=bins)
    )

    return X_train, X_valid, ys_train_rel, ys_valid_rel


def swapped_pairs(ys_pred, ys_target):
    N = ys_target.shape[0]
    swapped = 0
    for i in range(N - 1):
        for j in range(i + 1, N):
            if ys_target[i] < ys_target[j]:
                if ys_pred[i] > ys_pred[j]:
                    swapped += 1
            elif ys_target[i] > ys_target[j]:
                if ys_pred[i] < ys_pred[j]:
                    swapped += 1
    return swapped


def ndcg(ys_true, ys_pred):
    def dcg(ys_true, ys_pred):
        _, argsort = torch.sort(ys_pred, descending=True, dim=0)
        ys_true_sorted = ys_true[argsort]
        ret = 0
        for i, l in enumerate(ys_true_sorted, 1):
            ret += (2 ** l - 1) / np.log2(1 + i)
        return ret
    ideal_dcg = dcg(ys_true, ys_true)
    pred_dcg = dcg(ys_true, ys_pred)
    return pred_dcg / ideal_dcg

In [None]:
N_train = 500
N_valid = 100
D = 50
epochs = 10
batch_size = 16

X_train, X_valid, ys_train, ys_valid = make_dataset(N_train, N_valid, D)

In [None]:
net = Net(D)
opt = optim.Adam(net.parameters())

In [None]:
epoch = 0

In [None]:
idx = torch.randperm(N_train)

X_train = X_train[idx]
ys_train = ys_train[idx]

cur_batch = 0

In [None]:
it = 0

In [None]:
batch_X = X_train[cur_batch: cur_batch + batch_size]
batch_ys = ys_train[cur_batch: cur_batch + batch_size]
cur_batch += batch_size

In [None]:
batch_X.shape

In [None]:
opt.zero_grad()

In [None]:
opt

In [None]:
batch_pred = net(batch_X)
batch_pred

In [None]:
batch_ys

In [None]:
batch_loss = listnet_loss(batch_ys, batch_pred)
batch_loss

In [None]:
batch_loss.backward(retain_graph=True)

In [None]:
opt.step()

In [None]:
opt