##### Initialization

In [3]:
import numpy as np
import pandas as pd
from datetime import datetime
import time
import pickle

In [2]:
### Ignore this if this file isn't being run on a Colab Notebook

def mount_drive_select_path(drive_path = "/content/drive/MyDrive/UA/5º Ano/Tese"):

  # Mount drive
  from google.colab import drive
  drive.mount('/content/drive', force_remount = True)

  # Drive path
  %cd $drive_path

path = "/content/drive/MyDrive/UA/5º Ano/Tese/Professora\ - Dados\ -\ LSTMs"
mount_drive_select_path(drive_path = path)

Mounted at /content/drive
/content/drive/MyDrive/UA/5º Ano/Tese/Professora - Dados - LSTMs


# Notebook

The folder time_series contains a list of files. Each file contains all the time series associated with a given product. Each time series corresponds to a company.

There are two types of files:

**- Wide**: each column corresponds to the time series of a company.

**- Long**: the information is stored in long format. The column company identifies the company, the colum timestamp the instant at which a price was recorded, and the column price the value of the price. These dataframes contain some other columns whose meaning will be explained next.

On the title of each file the Product Group (a product group corresponds to a group of time series of the same product sold by different companies) is identified by a number, called the product_group_id.

The file 'product_information' contains information about the products in each product group. This file contains the following columns:

- **offer_type** - offer type associated with that time series. A product can be locked, unlocked... to a certain operator.
- **product** - name of the product associated with time series
- **product_group_id** - uniquely identifies a product group. A product group is uniquely identified by its name and its offer_type.

**Example**: read a file and its product_information

In [4]:
# Select product_group_id
product_group_id = 958

In [5]:
# Read file
df_1 = pd.read_pickle("Data/time_series_1/wide_product_group_id_" + str(product_group_id))
df_1

Unnamed: 0,Abcdin,Falabella,Lider,Paris,Ripley,claro,entel,movistar,wom,vtr,macOnline
2020-06-21,,291.085236,,,,209.214955,316.346766,272.891840,229.127627,,
2020-06-22,,291.085236,,,,209.214955,316.346766,272.891840,229.127627,,
2020-06-23,,291.085236,,,,209.214955,316.346766,272.891840,229.127627,,
2020-06-24,,291.085236,,,,209.214955,316.346766,272.891840,229.127627,,
2020-06-25,,291.085236,,,,209.214955,316.346766,272.891840,229.127627,,
...,...,...,...,...,...,...,...,...,...,...,...
2021-11-05,200.118257,,,,200.118257,,294.514691,191.021559,,,
2021-11-06,200.118257,,,,200.118257,,294.514691,191.021559,,,
2021-11-07,200.118257,,,,200.118257,,294.514691,191.021559,,,
2021-11-08,200.118257,,,,200.118257,,294.514691,191.021559,,,


In [6]:
# Read product information of the time series of that product group
product_information = pd.read_pickle("Data/product_information")
product_information.loc[product_group_id]

product       huawei Y9S 128GB
offer_type            unlocked
Name: 958, dtype: object

**Example**: convert a long dataframe to a wide format

In [7]:
df_long = pd.read_pickle("Data/time_series_1/long_product_group_id_" + str(product_group_id))
df_wide = df_long.reset_index().groupby(['timestamp', "company"])["price"].first().unstack().reset_index().set_index("timestamp")

# Fill missing timestamps
timestamps  = pd.DataFrame(index = pd.date_range(df_wide.index[0],df_wide.index[-1], freq='D'))
df_wide = pd.merge(timestamps, df_wide, left_index = True, right_index = True, how = 'left')
df_wide.columns.name = None

In [8]:
df_long

Unnamed: 0,product_id,timestamp,product,country,company,product_group_id,offer_type,price
422267,3217,2020-06-21,huawei Y9S 128GB,CL,movistar,958,unlocked,272.891840
422268,3217,2020-06-22,huawei Y9S 128GB,CL,movistar,958,unlocked,272.891840
422269,3217,2020-06-23,huawei Y9S 128GB,CL,movistar,958,unlocked,272.891840
422270,3217,2020-06-24,huawei Y9S 128GB,CL,movistar,958,unlocked,272.891840
422271,3217,2020-06-25,huawei Y9S 128GB,CL,movistar,958,unlocked,272.891840
...,...,...,...,...,...,...,...,...
426112,3230,2021-03-18,huawei Y9S 128GB,CL,Lider,958,unlocked,209.214955
426113,3230,2021-03-19,huawei Y9S 128GB,CL,Lider,958,unlocked,209.214955
426114,3230,2021-03-20,huawei Y9S 128GB,CL,Lider,958,unlocked,209.214955
426115,3230,2021-03-21,huawei Y9S 128GB,CL,Lider,958,unlocked,209.214955


In [9]:
df_wide

Unnamed: 0,Abcdin,Falabella,Lider,Paris,Ripley,claro,entel,movistar,wom
2020-06-21,,291.085236,,,,209.214955,316.346766,272.891840,229.127627
2020-06-22,,291.085236,,,,209.214955,316.346766,272.891840,229.127627
2020-06-23,,291.085236,,,,209.214955,316.346766,272.891840,229.127627
2020-06-24,,291.085236,,,,209.214955,316.346766,272.891840,229.127627
2020-06-25,,291.085236,,,,209.214955,316.346766,272.891840,229.127627
...,...,...,...,...,...,...,...,...,...
2021-11-05,200.118257,,,,200.118257,,294.514691,191.021559,
2021-11-06,200.118257,,,,200.118257,,294.514691,191.021559,
2021-11-07,200.118257,,,,200.118257,,294.514691,191.021559,
2021-11-08,200.118257,,,,200.118257,,294.514691,191.021559,
