**Installing required libraries to ensure reproducibility of the results**

In [1]:
!pip install datasets==2.8.0
!pip install transformers==4.26.0
!pip install huggingface-hub==0.13.0
!pip install rouge_score==0.1.2



In [2]:
#Download t5-small library
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")

In [3]:
# Each row represents a list of clothing items purchased  and the item to be recommended in Output column
import pandas as pd
csv_path = '/content/data_final3.csv'
data_new_df= pd.read_csv(csv_path)
data_new_df

Unnamed: 0.1,Unnamed: 0,Input,Output
0,0,"Heels, Dress, Earrings, Handbag",Flats
1,1,"Belt, Jeans, Watch, Sunglasses",Shirt
2,2,"Flats, Necklace, Dress, Handbag",Jeans
3,3,"T-shirt, Loafers, Belt, Watch",Sneakers
4,4,"Earrings, Jeans, Scarf, Dress",Handbag
...,...,...,...
124,124,"Dress, Earrings, Handbag, Flats",Sunglasses
125,125,"Jeans, Belt, Sunglasses, Shirt",Loafers
126,126,"Handbag, Dress, Flats, Necklace",Jeans
127,127,"T-shirt, Sunglasses, Jeans, Loafers",Sneakers


In [4]:
# Dropping the first column of data_new_df
data_new_df.drop('Unnamed: 0', axis=1, inplace=True)
data_new_df

Unnamed: 0,Input,Output
0,"Heels, Dress, Earrings, Handbag",Flats
1,"Belt, Jeans, Watch, Sunglasses",Shirt
2,"Flats, Necklace, Dress, Handbag",Jeans
3,"T-shirt, Loafers, Belt, Watch",Sneakers
4,"Earrings, Jeans, Scarf, Dress",Handbag
...,...,...
124,"Dress, Earrings, Handbag, Flats",Sunglasses
125,"Jeans, Belt, Sunglasses, Shirt",Loafers
126,"Handbag, Dress, Flats, Necklace",Jeans
127,"T-shirt, Sunglasses, Jeans, Loafers",Sneakers


In [5]:
# List of all items in the an outfit
items=['Blouse', 'Skirt', 'Heels', 'Necklace', 'Handbag', 'Dress', 'Flats', 'Earrings', 'Jeans', 'Scarf', 'Shirt', 'Sneakers', 'Watch', 'Belt', 'Sunglasses', 'Chinos', 'Loafers', 'T-shirt', 'Backpack']
data_new_df['Input'].iloc[0]

'Heels, Dress, Earrings, Handbag'

**Functions to make list of items**

In [6]:
# This function will the list of products that a customer has not yet purchased and use these items for recommendation.
def get_unpurchased(purchase_history):
  unpurchased = []

  for item in items:
    if(item not in purchase_history):
      unpurchased.append(item)
  return unpurchased

# These functions will build strings for the list of items for recommendation and list of items purchased
def modify_unpurchased_list(unpurchased_list):
  concatenated_list = ', '.join(unpurchased_list)
  concatenated_list = concatenated_list.rstrip(',')
  concatenated_list = "CANDIDATES FOR RECOMMENDATION: {" + concatenated_list + "}"
  return concatenated_list

def modify_purchased_items(purchase_history):
  purchase_history = "ITEMS PURCHASED: {" + purchase_history + "}"
  return purchase_history


In [7]:
# Items not yet purchased by the customer
data_new_df['Unpurchased_items'] = data_new_df['Input'].apply(get_unpurchased)
len(data_new_df['Unpurchased_items'].iloc[0])
data_new_df['Unpurchased_items'].iloc[0]
print(data_new_df['Input'].iloc[0])

data_new_df['Input'].apply(modify_purchased_items).iloc[0]
data_new_df['Unpurchased_items'].apply(modify_unpurchased_list).iloc[0]

# Combining the purchased and unpurchased item strings to build a prompt
data_new_df['Prompt'] = data_new_df['Input'].apply(modify_purchased_items) + \
                               " - " + data_new_df['Unpurchased_items'].apply(modify_unpurchased_list) + \
                               " - RECOMMENDATION: "

data_new_df

Heels, Dress, Earrings, Handbag


Unnamed: 0,Input,Output,Unpurchased_items,Prompt
0,"Heels, Dress, Earrings, Handbag",Flats,"[Blouse, Skirt, Necklace, Flats, Jeans, Scarf,...","ITEMS PURCHASED: {Heels, Dress, Earrings, Hand..."
1,"Belt, Jeans, Watch, Sunglasses",Shirt,"[Blouse, Skirt, Heels, Necklace, Handbag, Dres...","ITEMS PURCHASED: {Belt, Jeans, Watch, Sunglass..."
2,"Flats, Necklace, Dress, Handbag",Jeans,"[Blouse, Skirt, Heels, Earrings, Jeans, Scarf,...","ITEMS PURCHASED: {Flats, Necklace, Dress, Hand..."
3,"T-shirt, Loafers, Belt, Watch",Sneakers,"[Blouse, Skirt, Heels, Necklace, Handbag, Dres...","ITEMS PURCHASED: {T-shirt, Loafers, Belt, Watc..."
4,"Earrings, Jeans, Scarf, Dress",Handbag,"[Blouse, Skirt, Heels, Necklace, Handbag, Flat...","ITEMS PURCHASED: {Earrings, Jeans, Scarf, Dres..."
...,...,...,...,...
124,"Dress, Earrings, Handbag, Flats",Sunglasses,"[Blouse, Skirt, Heels, Necklace, Jeans, Scarf,...","ITEMS PURCHASED: {Dress, Earrings, Handbag, Fl..."
125,"Jeans, Belt, Sunglasses, Shirt",Loafers,"[Blouse, Skirt, Heels, Necklace, Handbag, Dres...","ITEMS PURCHASED: {Jeans, Belt, Sunglasses, Shi..."
126,"Handbag, Dress, Flats, Necklace",Jeans,"[Blouse, Skirt, Heels, Earrings, Jeans, Scarf,...","ITEMS PURCHASED: {Handbag, Dress, Flats, Neckl..."
127,"T-shirt, Sunglasses, Jeans, Loafers",Sneakers,"[Blouse, Skirt, Heels, Necklace, Handbag, Dres...","ITEMS PURCHASED: {T-shirt, Sunglasses, Jeans, ..."


**Splitting the dataset**

In [8]:
data_new_df['Prompt'].iloc[0]

# Reformat training and eval dataframes
train_set = data_new_df[['Prompt', 'Output']][0:100].reset_index().rename(columns={"Prompt":"source", "Output": "target", "index": "id"})
train_set = train_set.dropna()

train_set

Unnamed: 0,id,source,target
0,0,"ITEMS PURCHASED: {Heels, Dress, Earrings, Hand...",Flats
1,1,"ITEMS PURCHASED: {Belt, Jeans, Watch, Sunglass...",Shirt
2,2,"ITEMS PURCHASED: {Flats, Necklace, Dress, Hand...",Jeans
3,3,"ITEMS PURCHASED: {T-shirt, Loafers, Belt, Watc...",Sneakers
4,4,"ITEMS PURCHASED: {Earrings, Jeans, Scarf, Dres...",Handbag
...,...,...,...
95,95,"ITEMS PURCHASED: {T-shirt, Sunglasses, Jeans, ...",Sneakers
96,96,"ITEMS PURCHASED: {Dress, Earrings, Handbag, Sc...",Flats
97,97,"ITEMS PURCHASED: {Sneakers, Chinos, Belt, Sung...",Watch
98,98,"ITEMS PURCHASED: {Heels, Dress, Jeans, Earring...",Handbag


In [9]:
eval_set = data_new_df[['Prompt', 'Output']][100:].reset_index().rename(columns={"Prompt":"source", "Output": "target", "index": "id"})
eval_set = eval_set.dropna()
eval_set

Unnamed: 0,id,source,target
0,100,"ITEMS PURCHASED: {Flats, Necklace, Dress, Hand...",Jeans
1,101,"ITEMS PURCHASED: {Sunglasses, Chinos, T-shirt,...",Jeans
2,102,"ITEMS PURCHASED: {Dress, Earrings, Handbag, Fl...",Sunglasses
3,103,"ITEMS PURCHASED: {Belt, Jeans, Watch, Sunglass...",Shirt
4,104,"ITEMS PURCHASED: {Necklace, Flats, Dress, Hand...",Jeans
5,105,"ITEMS PURCHASED: {T-shirt, Loafers, Belt, Watc...",Sneakers
6,106,"ITEMS PURCHASED: {Earrings, Jeans, Scarf, Dres...",Handbag
7,107,"ITEMS PURCHASED: {Sneakers, Chinos, T-shirt, L...",Jeans
8,108,"ITEMS PURCHASED: {Dress, Earrings, Handbag, Fl...",Sunglasses
9,109,"ITEMS PURCHASED: {Jeans, Belt, Sunglasses, Shi...",Loafers


**Converting the dataset to objects to enable efficient handling of large datasets**

In [10]:
# Convert dataframes to Dataset objects
from datasets import Dataset

training_data = Dataset.from_pandas(train_set)
eval_data = Dataset.from_pandas(eval_set)


**Saving the objects in a dictionary ensuring a clear and structured way to manage multiple datasets within a single object**

In [11]:
# Build DatasetDict from Dataset objects
import datasets
data_dict_set = datasets.DatasetDict({"train": training_data, "eval": eval_data})
data_dict_set

DatasetDict({
    train: Dataset({
        features: ['id', 'source', 'target'],
        num_rows: 100
    })
    eval: Dataset({
        features: ['id', 'source', 'target'],
        num_rows: 29
    })
})

In [12]:
max_len = tokenizer.model_max_length
max_target = 20

In [13]:
def token_func(examples):
  inputs = [doc for doc in examples["source"]]
  model_inputs = tokenizer(inputs, max_length=max_len, truncation=True, padding=True)

  #Setup the tokenizer for targets
  with tokenizer.as_target_tokenizer():
    labels = tokenizer(examples["target"], max_length=max_target, truncation=True, padding=True)

  model_inputs["labels"] = labels["input_ids"]
  return model_inputs

# Test preprocessing on first 2 rows
token_func(data_dict_set["train"][:2])




{'input_ids': [[2344, 20804, 276, 5905, 20891, 134, 2326, 10, 3, 2, 3845, 3573, 6, 12169, 6, 262, 291, 1007, 7, 6, 2263, 7893, 2, 3, 18, 205, 9853, 26483, 21254, 5652, 4083, 6657, 329, 14920, 8015, 10, 3, 2, 279, 40, 1162, 15, 6, 7409, 52, 17, 6, 31420, 6, 13109, 7, 6, 3966, 7, 6, 14586, 89, 6, 3, 16671, 6, 29306, 277, 6, 4195, 6, 18845, 6, 3068, 15548, 15, 7, 6, 2695, 4844, 6, 1815, 9, 1010, 7, 6, 332, 18, 9486, 6, 3195, 5745, 2, 3, 18, 4083, 6657, 329, 14920, 8015, 10, 1, 0, 0], [2344, 20804, 276, 5905, 20891, 134, 2326, 10, 3, 2, 2703, 40, 17, 6, 3966, 7, 6, 4195, 6, 3068, 15548, 15, 7, 2, 3, 18, 205, 9853, 26483, 21254, 5652, 4083, 6657, 329, 14920, 8015, 10, 3, 2, 279, 40, 1162, 15, 6, 7409, 52, 17, 6, 216, 3573, 6, 31420, 6, 2263, 7893, 6, 12169, 6, 13109, 7, 6, 262, 291, 1007, 7, 6, 14586, 89, 6, 3, 16671, 6, 29306, 277, 6, 2695, 4844, 6, 1815, 9, 1010, 7, 6, 332, 18, 9486, 6, 3195, 5745, 2, 3, 18, 4083, 6657, 329, 14920, 8015, 10, 1]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1

In [14]:
print(len(token_func(data_dict_set["train"][:2])['input_ids'][0] ))
print(len(token_func(data_dict_set["train"][:2])['attention_mask'][0] ))
print(len(token_func(data_dict_set["train"][:2])['labels'][0] ))

99
99
3


In [15]:
# Tokenize train and eval datasets
tokenized_datasets = data_dict_set.map(token_func, batched=True)
tokenized_datasets

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'source', 'target', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 100
    })
    eval: Dataset({
        features: ['id', 'source', 'target', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 29
    })
})

In [16]:
# Instantiate Data Collator object
from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)


In [17]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    tokenized_datasets["train"], shuffle=True, batch_size=4, collate_fn=data_collator
)

eval_dataloader = DataLoader(
    tokenized_datasets["eval"], batch_size=4, collate_fn=data_collator
)

len(train_dataloader)

25

In [18]:
from transformers import AdamW, get_scheduler

learning_rate = 1e-4
optimizer = AdamW(model.parameters(), lr=learning_rate)

num_epochs = 4
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps,
)
print(num_training_steps)




100


**Training arguments (Increased the epochs from 8 to 20)**

In [19]:
batch_size = 4
args = Seq2SeqTrainingArguments(
    "./outfit_recommender",
    evaluation_strategy = "epoch",
    learning_rate = 0.0001,
    per_device_train_batch_size = batch_size,
    per_device_eval_batch_size = batch_size,
    save_total_limit = 3,
    num_train_epochs =20,
    predict_with_generate = True,
    gradient_accumulation_steps =4,
    eval_accumulation_steps = 4,
)

In [20]:
# ROUGE metric object
from datasets import load_dataset, load_metric
metric = load_metric("rouge")
metric

  metric = load_metric("rouge")


Metric(name: "rouge", features: {'predictions': Value(dtype='string', id='sequence'), 'references': Value(dtype='string', id='sequence')}, usage: """
Calculates average rouge scores for a list of hypotheses and references
Args:
    predictions: list of predictions to score. Each prediction
        should be a string with tokens separated by spaces.
    references: list of reference for each prediction. Each
        reference should be a string with tokens separated by spaces.
    rouge_types: A list of rouge types to calculate.
        Valid names:
        `"rouge{n}"` (e.g. `"rouge1"`, `"rouge2"`) where: {n} is the n-gram based scoring,
        `"rougeL"`: Longest common subsequence based scoring.
        `"rougeLSum"`: rougeLsum splits text using `"
"`.
        See details in https://github.com/huggingface/datasets/issues/617
    use_stemmer: Bool indicating whether Porter stemmer should be used to strip word suffixes.
    use_aggregator: Return aggregates if this is set to True
Retu

In [21]:
# Functions for further preprocessing and metrics computation
import numpy as np

def token_to_text(preds, labels):
  preds = [pred.strip() for pred in preds]
  labels = [[label.strip()] for label in labels]

  return preds, labels

def calc_metrics(eval_preds):
  preds, labels = eval_preds
  if isinstance(preds, tuple):
    preds = preds[0]
  decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

  labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
  decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

  decoded_preds, decoded_labels = token_to_text(decoded_preds, decoded_labels)

  result = metric.compute(predictions = decoded_preds, references = decoded_labels)
  result = {key: value.mid.fmeasure * 100 for key, value in result.items()}

  prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
  result["gen_len"] = np.mean(prediction_lens)

  return result

In [22]:
trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset = tokenized_datasets["train"],
    eval_dataset = tokenized_datasets["eval"],
    data_collator = data_collator,
    tokenizer = tokenizer,
    compute_metrics = calc_metrics,
    optimizers = (optimizer, lr_scheduler)
)


In [23]:
trainer.train()

The following columns in the training set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: source, id, target. If source, id, target are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 100
  Num Epochs = 20
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 4
  Total optimization steps = 120
  Number of trainable parameters = 60506624
You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
0,No log,5.939291,2.298851,0.0,2.298851,2.298851,19.0
1,No log,2.341985,12.027915,0.0,12.110016,12.151067,19.0
2,No log,1.245057,16.871921,0.0,16.871921,16.748768,16.37931
3,No log,0.748767,34.482759,0.0,34.482759,34.482759,3.0
4,No log,0.614623,34.482759,0.0,34.482759,34.482759,3.137931
5,No log,0.521257,37.931034,0.0,37.931034,37.931034,3.413793
6,No log,0.456128,51.724138,0.0,51.724138,51.724138,3.689655
7,No log,0.397154,44.827586,0.0,44.827586,44.827586,3.413793
8,No log,0.370857,44.827586,0.0,44.827586,44.827586,3.137931
9,No log,0.384056,44.827586,0.0,44.827586,44.827586,3.0


The following columns in the evaluation set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: source, id, target. If source, id, target are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 29
  Batch size = 4
Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.0"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.0"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.0"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.0"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  

TrainOutput(global_step=120, training_loss=1.7103658040364584, metrics={'train_runtime': 66.9416, 'train_samples_per_second': 29.877, 'train_steps_per_second': 1.793, 'total_flos': 52762155417600.0, 'train_loss': 1.7103658040364584, 'epoch': 19.96})

In [24]:
predictions = trainer.predict(tokenized_datasets["eval"])

The following columns in the test set don't have a corresponding argument in `T5ForConditionalGeneration.forward` and have been ignored: source, id, target. If source, id, target are not expected by `T5ForConditionalGeneration.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 29
  Batch size = 4
Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.0"
}



Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.0"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.0"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.0"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.0"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.0"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.26.0"
}

Generate config GenerationConfig {
  "decoder_start_token_id": 0,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_

In [25]:
predictions

PredictionOutput(predictions=array([[    0,  3966,     7,     1,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0],
       [    0,  3966,     7,     1,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0],
       [    0,  3966,     7,     1,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0],
       [    0,  2263,  7893,     1,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0],
       [    0,  3966,     7,     1,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0],
       [    0,  3966,     7,     1,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
    

In [26]:
# Convert tokens from data to text
def translator(tokens):
  new_list = tokenizer.convert_ids_to_tokens(tokens)
  new_list = [token for token in new_list if ('<' not in token)]
  new_string = ''.join(new_list)
  new_string = new_string.replace("▁", " ")
  new_string = new_string.strip()
  return new_string

# Print sample predicted output
index = 16
print(tokenized_datasets["eval"]["source"][index])
print("Target product: ", tokenized_datasets["eval"]["target"][index])
print("Recommended product: ", [translator(predictions.predictions[index])])


ITEMS PURCHASED: {Flats, Necklace, Dress, Handbag} - CANDIDATES FOR RECOMMENDATION: {Blouse, Skirt, Heels, Earrings, Jeans, Scarf, Shirt, Sneakers, Watch, Belt, Sunglasses, Chinos, Loafers, T-shirt, Backpack} - RECOMMENDATION: 
Target product:   Jeans
Recommended product:  ['Jeans']


In [27]:
# Print sample predicted output
index = 2
print(tokenized_datasets["eval"]["source"][index])
print("Target product: ", tokenized_datasets["eval"]["target"][index])
print("Recommended product: ", [translator(predictions.predictions[index])])

ITEMS PURCHASED: {Dress, Earrings, Handbag, Flats} - CANDIDATES FOR RECOMMENDATION: {Blouse, Skirt, Heels, Necklace, Jeans, Scarf, Shirt, Sneakers, Watch, Belt, Sunglasses, Chinos, Loafers, T-shirt, Backpack} - RECOMMENDATION: 
Target product:   Sunglasses
Recommended product:  ['Jeans']


In [28]:
# Collect generated outputs and join with prompts and targets
model_recommendtion = []
item_list_prompt = []
t_list = []

for i in range(len(predictions.predictions)):
  model_recommendtion.append(translator(predictions.predictions[i]))

  item_list_prompt.append(eval_data['source'][i])
  t_list.append(eval_data['target'][i])


In [29]:
recommended_dataframe = pd.DataFrame()

recommended_dataframe['input'] = item_list_prompt
recommended_dataframe['target'] = t_list
recommended_dataframe['model_recommendtion'] = model_recommendtion

recommended_dataframe

Unnamed: 0,input,target,model_recommendtion
0,"ITEMS PURCHASED: {Flats, Necklace, Dress, Hand...",Jeans,Jeans
1,"ITEMS PURCHASED: {Sunglasses, Chinos, T-shirt,...",Jeans,Jeans
2,"ITEMS PURCHASED: {Dress, Earrings, Handbag, Fl...",Sunglasses,Jeans
3,"ITEMS PURCHASED: {Belt, Jeans, Watch, Sunglass...",Shirt,Handbag
4,"ITEMS PURCHASED: {Necklace, Flats, Dress, Hand...",Jeans,Jeans
5,"ITEMS PURCHASED: {T-shirt, Loafers, Belt, Watc...",Sneakers,Jeans
6,"ITEMS PURCHASED: {Earrings, Jeans, Scarf, Dres...",Handbag,Handbag
7,"ITEMS PURCHASED: {Sneakers, Chinos, T-shirt, L...",Jeans,Jeans
8,"ITEMS PURCHASED: {Dress, Earrings, Handbag, Fl...",Sunglasses,Jeans
9,"ITEMS PURCHASED: {Jeans, Belt, Sunglasses, Shi...",Loafers,Handbag
