# Time Series API Emulator 📈

This script emulate Time-series API.

I hope this helps to validation, especially for reducing "Submission Scoring Error".

I made this script by reference to

- [Time-series API (iter_test) Emulator](https://www.kaggle.com/its7171/time-series-api-iter-test-emulator)
- [Local API Emulator](https://www.kaggle.com/jagofc/local-api-emulator)

Thank you! 😀

In [None]:
import numpy as np
import pandas as pd

In [None]:
class TimeSeriesAPI:
    def __init__(self, df):
        df = df.reset_index(drop=True)
        self.df = df
        self.target = df["target"].values

        df_groupby_timeid = df.groupby("time_id")
        self.df_iter = df_groupby_timeid.__iter__()
        self.init_num_timeid = len(df_groupby_timeid)

        self.next_calls = 0
        self.pred_calls = 0

        self.predictions = []
        self.targets = []

    def __iter__(self):
        return self

    def __len__(self):
        return self.init_num_timeid - self.next_calls

    def __next__(self):
        assert self.pred_calls == self.next_calls, "You must call `predict()` before you get the next batch of data."

        time_id, df = next(self.df_iter)
        self.next_calls += 1

        data_df = df  # .drop(columns=["time_id", "target"])
        for col in ["time_id", "target"]:
            try:
                data_df = data_df.drop(col, axis=1)
            except KeyError:
                pass

        target_df = df[["row_id", "target", "investment_id"]]
        self.targets.append(target_df)

        pred_df = target_df.drop(columns=["investment_id"])
        pred_df["target"] = 0.

        return data_df, pred_df

    def predict(self, pred_df):
        assert self.pred_calls == self.next_calls - 1, "You must get the next batch before making a new prediction."
        assert pred_df.columns.to_list() == ['row_id', 'target'], "Prediction dataframe have invalid columns."

        pred_df = pred_df.astype({'row_id': np.dtype('str'), 'target': np.dtype('float64')})
        self.predictions.append(pred_df)
        self.pred_calls += 1


# Test by training data

In [None]:
# train = pd.read_parquet('../input/ubiquant-parquet/train.parquet')
# small_train = train[train["investment_id"] < 10][:20]

In [None]:
# iter_train = TimeSeriesAPI(small_train)

# for n, (train_df, sample_prediction_df) in enumerate(iter_train):
#     
#     # make inference
#     sample_prediction_df["target"] = 1 / (n + 1)
#     
#     iter_train.predict(sample_prediction_df)

In [None]:
# prediction_df = pd.concat(iter_train.predictions)
# prediction_df

In [None]:
# del train, small_train, iter_train, prediction_df

# Test by supplemental data

In [None]:
supplemental = pd.read_csv("../input/ubiquant-market-prediction/supplemental_train.csv")

In [None]:
iter_train = TimeSeriesAPI(supplemental)

for n, (train_df, sample_prediction_df) in enumerate(iter_train):
    
    # make inference
    sample_prediction_df["target"] = 1 / (n + 1)
    
    iter_train.predict(sample_prediction_df)

In [None]:
prediction_df = pd.concat(iter_train.predictions)
prediction_df

In [None]:
print(f"Number of iteration: {n + 1}")

In [None]:
train_df.columns

In [None]:
sample_prediction_df.columns