### This file contains the naive method with $l_q=21$.

1. The results will be stored in `/Reproduction/Results/lq/Naive21/`.

In [None]:
import os
import pandas as pd
import numpy as np
import datetime

pd.options.mode.chained_assignment = None

In [None]:
# Get the current directory.
current_dir = os.getcwd()

# Set up random seeds for data splitting.
split_rs = [290, 150, 266, 78, 148, 133, 155, 135, 178, 241]

# Set up directories to store result data.
for rs in split_rs:
    os.makedirs(current_dir+'/Results/lq/Naive21/'+str(rs)+'-ResultData/')

# Set up data source and hyperparameters.
path = current_dir+'/Data21/'
template_length = 21

In [None]:
for rs in split_rs:
    ## Splitting data
    print('Calculating seed', rs, 'at', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        
    files = sorted(os.listdir(path))
    pads = [elt[:-7] for elt in files]
    files_df = pd.DataFrame({'filename':files, 'pad':pads})

    num_wells_in_pad_df = pd.DataFrame(files_df['pad'].value_counts()).reset_index()
    num_wells_in_pad_df.columns = ['pad', 'count']
    unique_pads = np.unique(pads)
    unique_pads_df = pd.DataFrame({'pad':unique_pads})
    unique_pads_df = pd.merge(unique_pads_df, num_wells_in_pad_df, on='pad')

    np.random.seed(rs)
    unique_pads_df_shuffled = unique_pads_df.sample(frac=1).reset_index(drop=True)
    counter = 0
    for idx in range(len(unique_pads_df_shuffled)):
        counter += unique_pads_df_shuffled['count'][idx]
        if counter >= 300:
            break
        else:
            continue
    end_of_training = idx

    train_files_shuffled = []
    for idx in range(end_of_training+1):
        pad_name = unique_pads_df_shuffled['pad'][idx]
        for file in files:
            if file[:-7] == pad_name:
                train_files_shuffled.append(file)
            else:
                continue

    test_files_shuffled = []
    for idx in range(end_of_training+1,len(unique_pads_df_shuffled)):
        pad_name = unique_pads_df_shuffled['pad'][idx]
        for file in files:
            if file[:-7] == pad_name:
                test_files_shuffled.append(file)
            else:
                continue

    print('len(train_files_shuffled):', len(train_files_shuffled), '          len(test_files_shuffled):', len(test_files_shuffled))
    print(test_files_shuffled)

    ## Forecasting
    print('Forecasting', 'at', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    # Get a test well.
    for m in range(len(test_files_shuffled)):

        df = pd.read_excel(path+test_files_shuffled[m], header = 0, sheet_name = 0)
        df['q'] = df['Q']/df['t']

        print('=====Calculating well=====', m, test_files_shuffled[m], 'at', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        reopenings = list(df[df['Mark'] == 'reopening'].index)
        reopenings = np.insert(reopenings, len(reopenings), len(df))

        forecasts_multisteps_this_well = []
        y_true_all_this_well = []
        prod_times_this_well = []
        markers_this_well = []

        for l in range(len(reopenings)-1):
            sub_df = df.iloc[reopenings[l]:reopenings[l+1]]

            y_true_all = sub_df['q'].values
            forecasts_multisteps = list(sub_df['q'][:template_length].values)
            prod_times = sub_df['t'].values
            markers = ['initial'] * template_length
            
            history = sub_df['q'][:template_length].values
            for t in range(len(sub_df)-template_length):
                markers.append('forecast')
                forecast = np.mean(history)
                forecasts_multisteps.append(forecast)

            for t in range(len(markers)):
                y_true_all_this_well.append(y_true_all[t])
                forecasts_multisteps_this_well.append(forecasts_multisteps[t])
                prod_times_this_well.append(prod_times[t])
                markers_this_well.append(markers[t])

        # Result
        multi_step_result_df = pd.DataFrame()
        multi_step_result_df['True'] = y_true_all_this_well
        multi_step_result_df['Pred'] = forecasts_multisteps_this_well
        multi_step_result_df['t'] = prod_times_this_well
        multi_step_result_df['Mark'] = markers_this_well
        multi_step_result_df['TrueCumu'] = (multi_step_result_df['True']*multi_step_result_df['t']).cumsum()
        multi_step_result_df['PredCumu'] = (multi_step_result_df['Pred']*multi_step_result_df['t']).cumsum()

        writer = pd.ExcelWriter(current_dir+'/Results/lq/Naive21/'+str(rs)+'-ResultData/ResultData-'+str(m)+'-'+str(test_files_shuffled[m]))
        multi_step_result_df.to_excel(writer, float_format='%.5f', header=True, index=False)
        writer.save()
        writer.close()