# Third Algorithm: Estimating of future values of "stock_distributed" data in 2019 and organize them in the required format

In [1]:
import pandas as pd
import numpy as np
from numpy import interp
from itertools import compress
import pickle as pkl
from tensorflow.keras.models import load_model

Import identified relevant data (from others scripts) for performing predictions of stock_distributed data

In [2]:
filehandler = open('df_data_final.pkl', 'rb') 
df_data_final = pkl.load(filehandler)
df_data_2019 = df_data_final[df_data_final['year']==2019]
filehandler.close()

Extract relevant data from csv files

In [3]:
df_product_info = pd.read_csv('product.csv')
df_district_info = pd.read_csv('service_delivery_site_data.csv') 
df_submission = pd.read_csv('SampleSubmission.csv')

total_cases = len(df_product_info) * len(df_district_info)
product_codes = list(df_product_info['product_code'])
site_codes = list(df_district_info['site_code'])

Initialize variables that will be necessary later to fill the submission form

In [4]:
dataset_test = np.array([])
all_cases = []
null_cases = []
counter = 0
counter_incomplete_cases = 0
complete_time = np.array(range(1,7)) # months 1-6 of 2019 as input of the net
MEMORY_TO_PRED = 6
MIN_MEMORY_TO_PRED = 4

Calculate necessary variables to fill the submission form and organize data correspondingly 

In [5]:
for i in product_codes:
    for j in site_codes:
        name = j + ' X ' + i
        all_cases.append(name)
        counter += 1
        print(f'Extracting relevant data for each product-district case: {counter} out of {total_cases}')
        
        df_single_case = df_data_2019[df_data_2019['product_code'].str.contains(i)]
        df_single_case = df_single_case[df_single_case["site_code"].str.contains(j)]
        
        if len(df_single_case) == MEMORY_TO_PRED:
            stock_seq = np.array(df_single_case['stock_distributed'])
        if len(df_single_case) < MIN_MEMORY_TO_PRED:
            counter_incomplete_cases += 1
            stock_seq = np.array([0, 0, 0, 0, 0, 0])
            null_cases.append(name)
        else:
            time_available = np.array(df_single_case['month'])
            stock_available = np.array(df_single_case['stock_distributed'])
            missing_time = np.setdiff1d(complete_time, time_available)
            missing_stock = interp(list(missing_time), time_available, stock_available)
            stock_seq = np.zeros(len(complete_time))
            stock_seq[time_available - 1] = stock_available
            stock_seq[missing_time - 1] = missing_stock
        
        if len(dataset_test) == 0:
            dataset_test = stock_seq
        else:
            dataset_test = np.vstack((dataset_test, stock_seq))


Extracting relevant data for each product-district case: 1 out of 1716
Extracting relevant data for each product-district case: 2 out of 1716
Extracting relevant data for each product-district case: 3 out of 1716
Extracting relevant data for each product-district case: 4 out of 1716
Extracting relevant data for each product-district case: 5 out of 1716
Extracting relevant data for each product-district case: 6 out of 1716
Extracting relevant data for each product-district case: 7 out of 1716
Extracting relevant data for each product-district case: 8 out of 1716
Extracting relevant data for each product-district case: 9 out of 1716
Extracting relevant data for each product-district case: 10 out of 1716
Extracting relevant data for each product-district case: 11 out of 1716
Extracting relevant data for each product-district case: 12 out of 1716
Extracting relevant data for each product-district case: 13 out of 1716
Extracting relevant data for each product-district case: 14 out of 1716
E

Extracting relevant data for each product-district case: 208 out of 1716
Extracting relevant data for each product-district case: 209 out of 1716
Extracting relevant data for each product-district case: 210 out of 1716
Extracting relevant data for each product-district case: 211 out of 1716
Extracting relevant data for each product-district case: 212 out of 1716
Extracting relevant data for each product-district case: 213 out of 1716
Extracting relevant data for each product-district case: 214 out of 1716
Extracting relevant data for each product-district case: 215 out of 1716
Extracting relevant data for each product-district case: 216 out of 1716
Extracting relevant data for each product-district case: 217 out of 1716
Extracting relevant data for each product-district case: 218 out of 1716
Extracting relevant data for each product-district case: 219 out of 1716
Extracting relevant data for each product-district case: 220 out of 1716
Extracting relevant data for each product-district 

Extracting relevant data for each product-district case: 346 out of 1716
Extracting relevant data for each product-district case: 347 out of 1716
Extracting relevant data for each product-district case: 348 out of 1716
Extracting relevant data for each product-district case: 349 out of 1716
Extracting relevant data for each product-district case: 350 out of 1716
Extracting relevant data for each product-district case: 351 out of 1716
Extracting relevant data for each product-district case: 352 out of 1716
Extracting relevant data for each product-district case: 353 out of 1716
Extracting relevant data for each product-district case: 354 out of 1716
Extracting relevant data for each product-district case: 355 out of 1716
Extracting relevant data for each product-district case: 356 out of 1716
Extracting relevant data for each product-district case: 357 out of 1716
Extracting relevant data for each product-district case: 358 out of 1716
Extracting relevant data for each product-district 

Extracting relevant data for each product-district case: 472 out of 1716
Extracting relevant data for each product-district case: 473 out of 1716
Extracting relevant data for each product-district case: 474 out of 1716
Extracting relevant data for each product-district case: 475 out of 1716
Extracting relevant data for each product-district case: 476 out of 1716
Extracting relevant data for each product-district case: 477 out of 1716
Extracting relevant data for each product-district case: 478 out of 1716
Extracting relevant data for each product-district case: 479 out of 1716
Extracting relevant data for each product-district case: 480 out of 1716
Extracting relevant data for each product-district case: 481 out of 1716
Extracting relevant data for each product-district case: 482 out of 1716
Extracting relevant data for each product-district case: 483 out of 1716
Extracting relevant data for each product-district case: 484 out of 1716
Extracting relevant data for each product-district 

Extracting relevant data for each product-district case: 616 out of 1716
Extracting relevant data for each product-district case: 617 out of 1716
Extracting relevant data for each product-district case: 618 out of 1716
Extracting relevant data for each product-district case: 619 out of 1716
Extracting relevant data for each product-district case: 620 out of 1716
Extracting relevant data for each product-district case: 621 out of 1716
Extracting relevant data for each product-district case: 622 out of 1716
Extracting relevant data for each product-district case: 623 out of 1716
Extracting relevant data for each product-district case: 624 out of 1716
Extracting relevant data for each product-district case: 625 out of 1716
Extracting relevant data for each product-district case: 626 out of 1716
Extracting relevant data for each product-district case: 627 out of 1716
Extracting relevant data for each product-district case: 628 out of 1716
Extracting relevant data for each product-district 

Extracting relevant data for each product-district case: 757 out of 1716
Extracting relevant data for each product-district case: 758 out of 1716
Extracting relevant data for each product-district case: 759 out of 1716
Extracting relevant data for each product-district case: 760 out of 1716
Extracting relevant data for each product-district case: 761 out of 1716
Extracting relevant data for each product-district case: 762 out of 1716
Extracting relevant data for each product-district case: 763 out of 1716
Extracting relevant data for each product-district case: 764 out of 1716
Extracting relevant data for each product-district case: 765 out of 1716
Extracting relevant data for each product-district case: 766 out of 1716
Extracting relevant data for each product-district case: 767 out of 1716
Extracting relevant data for each product-district case: 768 out of 1716
Extracting relevant data for each product-district case: 769 out of 1716
Extracting relevant data for each product-district 

Extracting relevant data for each product-district case: 911 out of 1716
Extracting relevant data for each product-district case: 912 out of 1716
Extracting relevant data for each product-district case: 913 out of 1716
Extracting relevant data for each product-district case: 914 out of 1716
Extracting relevant data for each product-district case: 915 out of 1716
Extracting relevant data for each product-district case: 916 out of 1716
Extracting relevant data for each product-district case: 917 out of 1716
Extracting relevant data for each product-district case: 918 out of 1716
Extracting relevant data for each product-district case: 919 out of 1716
Extracting relevant data for each product-district case: 920 out of 1716
Extracting relevant data for each product-district case: 921 out of 1716
Extracting relevant data for each product-district case: 922 out of 1716
Extracting relevant data for each product-district case: 923 out of 1716
Extracting relevant data for each product-district 

Extracting relevant data for each product-district case: 1064 out of 1716
Extracting relevant data for each product-district case: 1065 out of 1716
Extracting relevant data for each product-district case: 1066 out of 1716
Extracting relevant data for each product-district case: 1067 out of 1716
Extracting relevant data for each product-district case: 1068 out of 1716
Extracting relevant data for each product-district case: 1069 out of 1716
Extracting relevant data for each product-district case: 1070 out of 1716
Extracting relevant data for each product-district case: 1071 out of 1716
Extracting relevant data for each product-district case: 1072 out of 1716
Extracting relevant data for each product-district case: 1073 out of 1716
Extracting relevant data for each product-district case: 1074 out of 1716
Extracting relevant data for each product-district case: 1075 out of 1716
Extracting relevant data for each product-district case: 1076 out of 1716
Extracting relevant data for each prod

Extracting relevant data for each product-district case: 1213 out of 1716
Extracting relevant data for each product-district case: 1214 out of 1716
Extracting relevant data for each product-district case: 1215 out of 1716
Extracting relevant data for each product-district case: 1216 out of 1716
Extracting relevant data for each product-district case: 1217 out of 1716
Extracting relevant data for each product-district case: 1218 out of 1716
Extracting relevant data for each product-district case: 1219 out of 1716
Extracting relevant data for each product-district case: 1220 out of 1716
Extracting relevant data for each product-district case: 1221 out of 1716
Extracting relevant data for each product-district case: 1222 out of 1716
Extracting relevant data for each product-district case: 1223 out of 1716
Extracting relevant data for each product-district case: 1224 out of 1716
Extracting relevant data for each product-district case: 1225 out of 1716
Extracting relevant data for each prod

Extracting relevant data for each product-district case: 1328 out of 1716
Extracting relevant data for each product-district case: 1329 out of 1716
Extracting relevant data for each product-district case: 1330 out of 1716
Extracting relevant data for each product-district case: 1331 out of 1716
Extracting relevant data for each product-district case: 1332 out of 1716
Extracting relevant data for each product-district case: 1333 out of 1716
Extracting relevant data for each product-district case: 1334 out of 1716
Extracting relevant data for each product-district case: 1335 out of 1716
Extracting relevant data for each product-district case: 1336 out of 1716
Extracting relevant data for each product-district case: 1337 out of 1716
Extracting relevant data for each product-district case: 1338 out of 1716
Extracting relevant data for each product-district case: 1339 out of 1716
Extracting relevant data for each product-district case: 1340 out of 1716
Extracting relevant data for each prod

Extracting relevant data for each product-district case: 1445 out of 1716
Extracting relevant data for each product-district case: 1446 out of 1716
Extracting relevant data for each product-district case: 1447 out of 1716
Extracting relevant data for each product-district case: 1448 out of 1716
Extracting relevant data for each product-district case: 1449 out of 1716
Extracting relevant data for each product-district case: 1450 out of 1716
Extracting relevant data for each product-district case: 1451 out of 1716
Extracting relevant data for each product-district case: 1452 out of 1716
Extracting relevant data for each product-district case: 1453 out of 1716
Extracting relevant data for each product-district case: 1454 out of 1716
Extracting relevant data for each product-district case: 1455 out of 1716
Extracting relevant data for each product-district case: 1456 out of 1716
Extracting relevant data for each product-district case: 1457 out of 1716
Extracting relevant data for each prod

Extracting relevant data for each product-district case: 1580 out of 1716
Extracting relevant data for each product-district case: 1581 out of 1716
Extracting relevant data for each product-district case: 1582 out of 1716
Extracting relevant data for each product-district case: 1583 out of 1716
Extracting relevant data for each product-district case: 1584 out of 1716
Extracting relevant data for each product-district case: 1585 out of 1716
Extracting relevant data for each product-district case: 1586 out of 1716
Extracting relevant data for each product-district case: 1587 out of 1716
Extracting relevant data for each product-district case: 1588 out of 1716
Extracting relevant data for each product-district case: 1589 out of 1716
Extracting relevant data for each product-district case: 1590 out of 1716
Extracting relevant data for each product-district case: 1591 out of 1716
Extracting relevant data for each product-district case: 1592 out of 1716
Extracting relevant data for each prod

Reshape input data for the neural net and predict next future time instances (or load directly the predicted data)

In [6]:
jupyter_use = True
if jupyter_use:
    filehandler = open('data_test_prediction.pkl', 'rb') 
    dataset_test_predictions = pkl.load(filehandler)
    filehandler.close()
else:
    dataset_test_input = dataset_test.reshape(-1,6,1)         
    model = load_model('best_model.h5')
    dataset_test_predictions = model.predict(dataset_test_input)
    filehandler = open('data_test_prediction.pkl', 'wb') 
    pkl.dump(dataset_test_predictions, filehandler)
    filehandler.close()

Inverse-transforming scaled predictions

In [7]:
filehandler = open('scaler_final.pkl', 'rb') 
scaler = pkl.load(filehandler)
dataset_test_predictions_ = np.round(scaler.inverse_transform(dataset_test_predictions))
filehandler.close()

# Filling the submission file 

In [8]:
all_cases = np.array(all_cases)
null_cases = np.array(null_cases)

for i in range(len(df_submission)):
    # Identify the current case
    case_code = df_submission['ID'][i][-15:]
    bool_cases_null = null_cases == case_code
    
    # Verify that enough information is available to make the inference
    if sum(bool_cases_null) == 0:
        # Extract particular estimation by the neural net
        bool_cases =  all_cases == case_code
        current_index = compress(range(len(bool_cases)), bool_cases)
        current_index = next(current_index)
    
        month = int(df_submission['ID'][i][-19])
        month_index = month - 4
        df_submission['prediction'][i] = dataset_test_predictions_[current_index, month_index]

df_submission.to_csv('Submission.csv', index=False) 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
