## H&M Personalized Fashion Recommendations

This notebook contains the approach taken for the 2022 H&M Personalized Fashion Recommendations Kaggle competition. 

*Visit repo README.md for instructions on how to execute notebook locally.*

Developed By **Jaileen Salazar**
_____

### Required Dependencies

In [None]:
# Data Processing
import pandas as pd
import numpy as np
import csv
import re
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# RNN
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding, SpatialDropout1D, Bidirectional
from keras.callbacks import EarlyStopping
from keras.metrics import Recall, Precision

### Constants

In [None]:
# FILE PATHS
TRAIN_PATH = '../data/transactions_train.csv'
CUSTOMER_PATH = '../data/customers.csv'
ARTICLES_PATH = '../data/articles.csv'

# FILE FORMATS
#t_dat,customer_id,article_id,price,sales_channel_id
TRAIN_FORMAT = {'t_dat':str, 'customer_id':str, 'article_id':str, 'price':float, 'sales_channel_id':int}
CUSTOMER_META_FORMAT = {'customer_id':str, 'FN':str, 'Active':str, 'club_member_status':str, 'fashion_news_frequency':str, 'age':int, 'postal_code':str}
ARTICLE_META_FORMAT = {'article_id':str, 'product_code':str, 'prod_name':str, 'product_type_no':int, 'product_type_name':str, 'product_group_name':str, 'graphical_appearance_no':int, 'graphical_appearance_name':str, 'colour_group_code':str, 'colour_group_name':str, 'perceived_colour_value_id':str, 'perceived_colour_value_name':str, 'perceived_colour_master_id':str, 'perceived_colour_master_name':str, 'department_no':int, 'department_name':str, 'index_code':str, 'index_name':str, 'index_group_no':int, 'index_group_name':str, 'section_no':int, 'section_name':str, 'garment_group_no':int, 'garment_group_name':str, 'detail_desc':str} 
SUBMISSION_HEADERS = ['customer_id','prediction']

# Regular expressions used for preprocessing
REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
HTML_TAGS_RE = re.compile('<.*?>')
TOKEN_FILTERS = '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~'

### Fashion Recommendations Class

In [None]:
class FashionRecommendations():
    def __init__(self):
        pass

    def _preprocess_text(this, text):
        """""
            Text preprocessing to polish data.
        """
        text = text.lower()
        text = HTML_TAGS_RE.sub('', text)
        text = REPLACE_BY_SPACE_RE.sub(' ', text)
        text = BAD_SYMBOLS_RE.sub('', text)
        return text

    def parse_data(this, filepath, datatypes):
        """
            Open file, apply preprocessing and return formatted dataframe
        """
        df_data = pd.read_csv(filepath, dtype=datatypes)
        return df_data

    def save_results(this, ids, predictions, filename, headers):
        data = zip(ids, predictions)
        with open(filename, 'w', encoding='UTF8', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(headers)
            writer.writerows(data)