## Definitions

In [None]:
import pandas as pd
from platform_sdk.dataset_reader import DatasetReader

inputDataset="5e5f62408ac43618a8b959aa" # Adobe Analytics: Demo Environment postValues
outputDataset="5e5f62599b451c18a97904df" # Recommendations Input Datasetb

item_id = "_tcsamericasptrsd.productData.productName"
interactionType = "_tcsamericasptrsd.productData.productInteraction"
user_id = "_tcsamericasptrsd.identification.ecid"
brand_name = "_tcsamericasptrsd.brand.brandName"
timestamp = "timestamp"
#tenant_id = "_tcsamericasptrsd"

client_context = PLATFORM_SDK_CLIENT_CONTEXT

## Load the purchases data

In [None]:
dataset_reader = DatasetReader(PLATFORM_SDK_CLIENT_CONTEXT, dataset_id=inputDataset)
df = dataset_reader.limit(50000).read()

In [None]:
df.head()

## Filtering

In [None]:
# drop nulls
df = df.dropna(subset=[user_id, item_id, interactionType, brand_name])

# only focus on one brand
df = df[df[brand_name] == "Luma Retail"]


## Split items into individual records

In [None]:
# vectorized (no loops) solution for splitting in pandas
# source: https://stackoverflow.com/a/48120674
def split_df(dataframe, col_name, sep):
    orig_col_index = dataframe.columns.tolist().index(col_name)
    orig_index_name = dataframe.index.name
    orig_columns = dataframe.columns
    dataframe = dataframe.reset_index()
    index_col_name = (set(dataframe.columns) - set(orig_columns)).pop()
    df_split = pd.DataFrame(
        pd.DataFrame(dataframe[col_name].str.split(sep).tolist())
        .stack().reset_index(level=1, drop=1), columns=[col_name])
    df = dataframe.drop(col_name, axis=1)
    df = pd.merge(df, df_split, left_index=True, right_index=True, how='inner')
    df = df.set_index(index_col_name)
    df.index.name = orig_index_name

    return df

df2 = split_df(df, item_id, "\|\|")

# Data Prep for saving back to platform

In [None]:
filtered_column_list = [item_id, user_id, interactionType, timestamp]

df2 = df2[filtered_column_list]


df2.rename(columns={
    item_id: "_tcsamericasptrsd.itemId",
    user_id: "_tcsamericasptrsd.userId",
    interactionType: "_tcsamericasptrsd.interactionType",
    brand_name: "_tcsamericasptrsd.brandName"
}, inplace=True)

df2.head()

# Write new dataframe to platform

In [None]:
from platform_sdk.dataset_writer import DatasetWriter
from platform_sdk.models import Dataset

dataset = Dataset(PLATFORM_SDK_CLIENT_CONTEXT).get_by_id(outputDataset)
dataset_writer = DatasetWriter(PLATFORM_SDK_CLIENT_CONTEXT, dataset)
dataset_writer.write(df2, file_format='json')