In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime
import time

## Helper functions

In [2]:
# Function to change a single strings based on a preferred number
# 1: Transform (Change, Encode) the string
# 0: Revert (Decode) to the original string
def transform_revert_string(input_string, operation, transform_number=-1, slash_subber="g"):
    return_string = ""
    
    if operation == 1:
        for char in input_string:
            if (chr(ord(char) + transform_number) == "/"):
                return_string += slash_subber
            else:
                return_string += chr(ord(char) + transform_number)
            #if return_string[-1]
    elif operation == 0:
        for char in input_string:
            if char == slash_subber:
                return_string += chr(ord("/") - transform_number)
            else:
                return_string += chr(ord(char) - transform_number)
    else:
        print("Wrong operation parameter")
    
    return return_string

In [3]:
# Function to change the strings in a list or to revert them back to normal
# 1: Transform (Change, Encode) the names
# 0: Revert (Decode) to the original name
def transform_revert_reference_codes(code_list, operation):
    if operation == 1 or operation == 0:
        new_names = [transform_revert_string(code_name, operation=operation) for code_name in code_list]
    else:
        print("Wrong operation parameter")
    
    return new_names

In [4]:
# Function to change rename the images
# 1: Transform (Change, Encode) the names
# 0: Revert (Decode) to the original name
def rename_images_by_year_split(year, operation):
    acceptable_year_vals = [2016,2017,2018,2019]
    if year not in acceptable_year_vals:
        print("No image data for the given year")
    else:
        print("Renaming year", year, "images...")
        img_path = "../datasets/img_split_by_year/"+str(year)+"/"
        
        for img_name in os.listdir(img_path):
            os.rename(img_path+img_name, img_path+transform_revert_string(img_name[:-4], operation=operation)+".jpg") 
        print("\nDone!")

In [5]:
# Build index in order to transform data into time series
def dateindex_from_weeknum(weeknum, year):
    if weeknum == 1:
        date_str = "" + str(year) + "-01-01"
        dateindex = datetime.datetime.strptime(date_str, "%Y-%m-%d")
    else:
        date_str = "" + str(year) + "-W" + str(weeknum-1) + "-1"
        dateindex = datetime.datetime.strptime(date_str, "%Y-W%W-%w")

    return dateindex

In [6]:
def prepare_ts_sales_data(location_df):
    location_df = location_df.drop(["Unnamed: 0", "listino"], axis=1)
    location_df["codice esterno"] = transform_revert_reference_codes(location_df["codice esterno"].values, operation=1)
    datetime_index = [dateindex_from_weeknum(week_year[0], week_year[1]) for week_year in location_df[["settimana", "anno"]].values]
    location_df["time"] = datetime_index
    location_df = location_df.set_index(["time"])
    location_df.sort_index(inplace=True)
    return location_df

# Rename images and the article codes in the global data dataframe

In [7]:
# Encode example
# rename_images_by_year_split(2016, operation=1)
# Decode example
# rename_images_by_year_split(2016, operation=0)

In [8]:
# First make sure you make all the names uppercase for consitency with the global dataframe
all_img_path = "../datasets/all_img/"
print("Trasforming all img names into uppercase...")
for img_name in os.listdir(all_img_path):
    os.rename(all_img_path+img_name, all_img_path+img_name.upper())
print("Done")
print()

Trasforming all img names into uppercase...
Done



In [9]:
# Encode all the image names
print("Encoding all img names...")
for img_name in os.listdir(all_img_path):
    os.rename(all_img_path+img_name, all_img_path+transform_revert_string(img_name[:-4], operation=1)+".jpg") 
print("Done")

Encoding all img names...
Done


In [10]:
# Check out some of the new names
os.listdir(all_img_path)[:10]

['@LUR2304WSABK,05.jpg',
 'AFRg2E@MD,05.jpg',
 'S0336E48S6E@MD,07.jpg',
 'WERg1E@AK,07.jpg',
 'WX37E@,05.jpg',
 'OK01gg8KOA,07.jpg',
 'RHQ812OK,05.jpg',
 'RR2E@,06.jpg',
 'OK00018838MD,05.jpg',
 'S2637LN,08.jpg']

In [11]:
# Transform all the image names
#for year in [2016,2017,2018,2019]:
#    rename_images_by_year_split(year, operation=0)

In [12]:
transform_revert_string("gg0@F@SSHAH,05", operation=0)

'001AGATTIBI-16'

# Global Reference 

In [13]:
# Rename reference (codice esterno) values also in the global csv
global_data_init_df = pd.read_csv("../datasets/dati_nunalie/global_data_20.csv")
global_data_init_df["codice esterno"] = transform_revert_reference_codes(global_data_init_df["codice esterno"].values, operation=1)
global_data_init_df

Unnamed: 0,codice esterno,articolo,modello,variante,tessuto,colore,cluster,posizione,n_taglie,stagione,...,vendita-11,vendita-12,vendita-13,vendita-14,vendita-15,vendita-16,vendita-17,vendita-18,vendita-19,vendita-20
0,"gg0@F@SSHAH,05",MAGLIE,TAGLIA E CUCI,BASIC,JEARSEY OPACO,BIANCO,B,PARETE 12,7,AI16,...,2.0,5.0,1.0,5.0,26.0,8.0,53.0,88.0,57.0,36.0
1,"gg0@FTENAH,05",MAGLIE,TAGLIA E CUCI,BASIC,JEARSEY OPACO,BIANCO,B,STAND 21,7,AI16,...,1.0,3.0,1.0,1.0,7.0,20.0,25.0,34.0,20.0,57.0
2,"gg0@Q@F@YY@AH,05",MAGLIE,TAGLIA E CUCI,BASIC,JEARSEY OPACO,BIANCO,A,PARETE 12,7,AI16,...,1.0,3.0,1.0,11.0,19.0,25.0,40.0,62.0,52.0,59.0
3,"g11UL,05",GIACCHE,GIUBBINI,BASIC,COTONE,VERDE MILITARE,B,PARETE 09,7,PE16,...,19.0,14.0,7.0,5.0,4.0,3.0,4.0,2.0,10.0,12.0
4,"g17UL,05",GIACCHE,GIUBBINI,BASIC,COTONE,VERDE MILITARE,B,PARETE 09,7,PE16,...,29.0,19.0,7.0,8.0,7.0,5.0,2.0,1.0,8.0,18.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6652,"YOg01AD,07",GIACCHE,PELLE/PELLICCIA,BASIC,PELLICCIA,BEIGE,B,PARETE 24,7,AI18,...,43.0,43.0,68.0,48.0,67.0,54.0,36.0,28.0,23.0,10.0
6653,"YQ06121AK,07",PANTALONI,CONFEZIONE,CHINO,COTONE,BL,A,TAVOLO 05,7,PE18,...,27.0,19.0,18.0,17.0,9.0,13.0,7.0,4.0,10.0,12.0
6654,"YQ06121MD,07",PANTALONI,CONFEZIONE,CHINO,COTONE,NERO,A,TAVOLO 05,7,PE18,...,32.0,13.0,23.0,15.0,12.0,13.0,9.0,7.0,8.0,18.0
6655,"YQ06121UL,07",PANTALONI,CONFEZIONE,CHINO,COTONE,VERDE MILITARE,A,TAVOLO 05,7,PE18,...,21.0,16.0,22.0,21.0,12.0,10.0,15.0,7.0,6.0,7.0


In [14]:
# SIMPLE TEST: Check if all the article codes in our reference table have a corresponding image in the image folder
all_img_names = os.listdir(all_img_path)
print("Checking for missing images...")
for ref_img_name in global_data_init_df["codice esterno"].values:
    if ref_img_name+".jpg" not in all_img_names:
        print(ref_img_name, "was not found inside the image folder")
print("Done\n")

Checking for missing images...
Done



In [15]:
global_references_df = global_data_init_df.drop_duplicates()
reference_wanted_columns = ["codice esterno", "articolo", "modello", "variante", "tessuto", \
                            "colore", "cluster", "posizione", "stagione", "vendita", "settimana_in"]
global_references_df = global_references_df[reference_wanted_columns]
global_references_df.to_csv("../datasets/Protocol_data/global_references.csv")
global_references_df

Unnamed: 0,codice esterno,articolo,modello,variante,tessuto,colore,cluster,posizione,stagione,vendita,settimana_in
0,"gg0@F@SSHAH,05",MAGLIE,TAGLIA E CUCI,BASIC,JEARSEY OPACO,BIANCO,B,PARETE 12,AI16,734,39
1,"gg0@FTENAH,05",MAGLIE,TAGLIA E CUCI,BASIC,JEARSEY OPACO,BIANCO,B,STAND 21,AI16,487,37
2,"gg0@Q@F@YY@AH,05",MAGLIE,TAGLIA E CUCI,BASIC,JEARSEY OPACO,BIANCO,A,PARETE 12,AI16,492,39
3,"g11UL,05",GIACCHE,GIUBBINI,BASIC,COTONE,VERDE MILITARE,B,PARETE 09,PE16,637,7
4,"g17UL,05",GIACCHE,GIUBBINI,BASIC,COTONE,VERDE MILITARE,B,PARETE 09,PE16,621,6
...,...,...,...,...,...,...,...,...,...,...,...
6652,"YOg01AD,07",GIACCHE,PELLE/PELLICCIA,BASIC,PELLICCIA,BEIGE,B,PARETE 24,AI18,600,45
6653,"YQ06121AK,07",PANTALONI,CONFEZIONE,CHINO,COTONE,BL,A,TAVOLO 05,PE18,668,8
6654,"YQ06121MD,07",PANTALONI,CONFEZIONE,CHINO,COTONE,NERO,A,TAVOLO 05,PE18,787,7
6655,"YQ06121UL,07",PANTALONI,CONFEZIONE,CHINO,COTONE,VERDE MILITARE,A,TAVOLO 05,PE18,676,7


In [16]:
sales_unwanted_columns = ["articolo", "modello", "variante", "tessuto", \
                            "colore", "cluster", "posizione", "stagione", "settimana_in", \
                            "n_taglie", "stagione_abbr", "anno_stagione", "posizione_abbr"]
global_sales_df = global_data_init_df.drop(sales_unwanted_columns, axis=1)
global_sales_df.to_csv("../datasets/Protocol_data/global_sales_data.csv")

# Sales data

In [17]:
# Milan data
milan_data_df = pd.read_csv("../datasets/dati_nunalie/MILANO/ARESE.csv")
milan_data_df = milan_data_df.append(pd.read_csv("../datasets/dati_nunalie/MILANO/BUSNA.csv"))
milan_data_df = milan_data_df.append(pd.read_csv("../datasets/dati_nunalie/MILANO/PAVIA.csv"))
milan_data_df = prepare_ts_sales_data(milan_data_df)
# Turin data
turin_data_df = pd.read_csv("../datasets/dati_nunalie/TORINO/BEINA.csv")
turin_data_df = turin_data_df.append(pd.read_csv("../datasets/dati_nunalie/TORINO/JUVE.csv"))
turin_data_df = turin_data_df.append(pd.read_csv("../datasets/dati_nunalie/TORINO/NICHE.csv"))
turin_data_df = turin_data_df.append(pd.read_csv("../datasets/dati_nunalie/TORINO/TODOR.csv"))
turin_data_df = turin_data_df.append(pd.read_csv("../datasets/dati_nunalie/TORINO/TORIN.csv"))
turin_data_df = prepare_ts_sales_data(turin_data_df)
# Rome data
rome_data_df = pd.read_csv("../datasets/dati_nunalie/ROMA/EURR2.csv")
rome_data_df = rome_data_df.append(pd.read_csv("../datasets/dati_nunalie/ROMA/RMNAZ.csv"))
rome_data_df = rome_data_df.append(pd.read_csv("../datasets/dati_nunalie/ROMA/TIBUR.csv"))
rome_data_df = rome_data_df.append(pd.read_csv("../datasets/dati_nunalie/ROMA/TUSC1.csv"))
rome_data_df = rome_data_df.append(pd.read_csv("../datasets/dati_nunalie/ROMA/TUSC2.csv"))
rome_data_df = prepare_ts_sales_data(rome_data_df)

In [20]:
# Print and export sales data
print(milan_data_df.head())
print()
print(turin_data_df.head())
print()
print(rome_data_df.head())
milan_data_df.to_csv("../datasets/Protocol_data/milan_sales_data.csv")
turin_data_df.to_csv("../datasets/Protocol_data/turin_sales_data.csv")
rome_data_df.to_csv("../datasets/Protocol_data/rome_sales_data.csv")

           codice esterno  vendita  settimana  anno taglia negozio
time                                                              
2016-04-11  05RE070AMD,05        1         16  2016     XS   ARESE
2016-04-11  05RE070A@U,05        1         16  2016      S   ARESE
2016-04-11  U@404122MD,05        1         16  2016      M   ARESE
2016-04-11  05RE070A@U,05        1         16  2016     XS   ARESE
2016-04-11  05RE070AMD,05        1         16  2016    XXS   ARESE

             codice esterno  vendita  settimana  anno taglia negozio
time                                                                
2016-03-07  05RE070BNSAH,05        1         11  2016      L    JUVE
2016-03-14    @MADS@E@FQ,05        1         12  2016      S   TORIN
2016-03-14       LD021QR,05        1         12  2016      S   TORIN
2016-03-14        RR58MD,05        1         12  2016      M   TORIN
2016-03-14    SQHR1g08MD,05        1         12  2016    XXS   TORIN

             codice esterno  vendita  settiman

In [21]:
# Example: check total sales of an item from the global reference created before
extra_data = global_references_df[global_references_df["codice esterno"] == "05RE070AMD,05"]
extra_data

Unnamed: 0,codice esterno,articolo,modello,variante,tessuto,colore,cluster,posizione,stagione,vendita,settimana_in
371,"05RE070AMD,05",PANTALONI,5 TASCHE,BASIC,DRILL PESANTE,NERO,A,STAND 13,PE16,1004,7


# Warehouse data

In [22]:
all_warehouse_data = pd.read_csv("../datasets/dati_nunalie/magazzino/magazzino_PE16_AI19.csv", index_col=[5], parse_dates=True)
all_warehouse_data.sort_index(inplace=True)
all_warehouse_data = all_warehouse_data.drop(["Unnamed: 0"], axis=1)
all_warehouse_data["codice_esterno"] = transform_revert_reference_codes(all_warehouse_data["codice_esterno"].values, operation=1)
all_warehouse_data

Unnamed: 0_level_0,negozio,codice_esterno,taglia,qty
data_mov,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-12-02,TORIN,"RR65E@,05",XXS,1.0
2015-12-02,LECCE,"RR64E@,05",S,2.0
2015-12-02,LECCE,"RR64E@,05",M,2.0
2015-12-02,LECCE,"RR64E@,05",L,1.0
2015-12-02,LECCE,"RR64E@,05",XL,1.0
...,...,...,...,...
2020-03-11,PRADA,"XK038QR,08",UN,1.0
2020-03-11,PRADA,"0164gU465E@MD,08",UN,1.0
2020-03-11,PRADA,"O1622A4g1MD,08",S,1.0
2020-03-11,PRADA,"00458U465E@MD,08",XS,1.0


In [23]:
# Divide the data based on the stores that we want
# Milan data
milan_warehouse_data = all_warehouse_data[all_warehouse_data["negozio"] == "ARESE"]
milan_warehouse_data = milan_warehouse_data.append(all_warehouse_data[all_warehouse_data["negozio"] == "BUSNA"])
milan_warehouse_data = milan_warehouse_data.append(all_warehouse_data[all_warehouse_data["negozio"] == "PAVIA"])
milan_warehouse_data.sort_index(inplace=True)
# Turin data
turin_warehouse_data = all_warehouse_data[all_warehouse_data["negozio"] == "BEINA"]
turin_warehouse_data = turin_warehouse_data.append(all_warehouse_data[all_warehouse_data["negozio"] == "JUVE"])
turin_warehouse_data = turin_warehouse_data.append(all_warehouse_data[all_warehouse_data["negozio"] == "NICHE"])
turin_warehouse_data = turin_warehouse_data.append(all_warehouse_data[all_warehouse_data["negozio"] == "TODOR"])
turin_warehouse_data = turin_warehouse_data.append(all_warehouse_data[all_warehouse_data["negozio"] == "TORIN"])
turin_warehouse_data.sort_index(inplace=True)
# Rome data
rome_warehouse_data = all_warehouse_data[all_warehouse_data["negozio"] == "EURR2"]
rome_warehouse_data = rome_warehouse_data.append(all_warehouse_data[all_warehouse_data["negozio"] == "RMNAZ"])
rome_warehouse_data = rome_warehouse_data.append(all_warehouse_data[all_warehouse_data["negozio"] == "TIBUR"])
rome_warehouse_data = rome_warehouse_data.append(all_warehouse_data[all_warehouse_data["negozio"] == "TUSC1"])
rome_warehouse_data = rome_warehouse_data.append(all_warehouse_data[all_warehouse_data["negozio"] == "TUSC2"])
rome_warehouse_data.sort_index(inplace=True)

In [24]:
# Print and export sales data
print(milan_warehouse_data.head())
print()
print(turin_warehouse_data.head())
print()
print(rome_warehouse_data.head())
milan_warehouse_data.to_csv("../datasets/Protocol_data/milan_warehouse_data.csv")
turin_warehouse_data.to_csv("../datasets/Protocol_data/turin_warehouse_data.csv")
rome_warehouse_data.to_csv("../datasets/Protocol_data/rome_warehouse_data.csv")

           negozio codice_esterno taglia  qty
data_mov                                     
2016-03-04   ARESE  @KL64086AD,05  L      2.0
2016-03-04   ARESE  U@404122ID,05  S      3.0
2016-03-04   ARESE  U@404122ID,05  XS     2.0
2016-03-04   ARESE  U@404122ID,05  L      2.0
2016-03-04   ARESE  U@404122ID,05  XL     2.0

           negozio codice_esterno taglia  qty
data_mov                                     
2015-12-02   TORIN      6053AH,05  L      2.0
2015-12-02   TORIN      RR64E@,05  S      2.0
2015-12-02   TORIN      RR64E@,05  XS     2.0
2015-12-02   TORIN      RR64E@,05  XXS    1.0
2015-12-02   TORIN      RR62E@,05  XL     1.0

           negozio codice_esterno taglia  qty
data_mov                                     
2015-12-04   TIBUR     KL456@U,05  M      3.0
2015-12-04   TUSC2     KL456MD,05  L      2.0
2015-12-04   TUSC2     KL456MD,05  M      3.0
2015-12-04   TUSC2     KL456MD,05  S      3.0
2015-12-04   TUSC2     KL456O@,05  L      2.0
