## H&M Personalized Fashion Recommendations

This notebook contains the approach taken for the 2022 H&M Personalized Fashion Recommendations Kaggle competition. 

*Visit repo README.md for instructions on how to execute notebook locally.*

Developed By **Jaileen Salazar**
_____

### Required Dependencies

In [None]:
# Data Processing
import pandas as pd
import numpy as np
import csv
import re
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# RNN
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding, SpatialDropout1D, Bidirectional
from keras.callbacks import EarlyStopping
from keras.metrics import Recall, Precision

### Constants

In [None]:
# FILE PATHS
TRAIN_PATH = '../data/train.csv'
TEST_PATH = '../data/test.csv'

# FILE FORMATS
TRAIN_FORMAT = {'ID': int, 'TEXT': str, 'LABEL': int}
TEST_FORMAT = {'ID': int, 'TEXT': str}

# Regular expressions used for preprocessing
REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
HTML_TAGS_RE = re.compile('<.*?>')
TOKEN_FILTERS = '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~'

### Fashion Recommendations Class

In [None]:
class FashionRecommendations():
    def __init__(self):
        pass

    def _preprocess_text(this, text):
        """""
            Text preprocessing to polish data.
        """
        text = text.lower()
        text = HTML_TAGS_RE.sub('', text)
        text = REPLACE_BY_SPACE_RE.sub(' ', text)
        text = BAD_SYMBOLS_RE.sub('', text)
        return text

    def parse_data(this, filepath, datatypes):
        """
            Open file, apply preprocessing and return formatted dataframe
        """
        df_data = pd.read_csv(filepath, dtype=datatypes)
        return df_data