# Cargando Entorno

In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os
import pandas as pd

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "01_AED"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

print('Terminé de correr la primera celda')

Terminé de correr la primera celda


In [2]:
RETAIL_PATH = "datasets"

In [6]:
if 'google.colab' in sys.modules:
        
    import tarfile

    #     DOWNLOAD_ROOT = "https://github.com/hcgalvan/UNSAM-Machine-Learning-on-Economics/raw/master/"
    #     HOUSING_URL = DOWNLOAD_ROOT + "datasets/datasets.tar.gz"
    DOWNLOAD_ROOT = "https://github.com/hcgalvan/UNSAM-Machine-Learning-on-Economics/raw/main/"
    DATASETS_URL = DOWNLOAD_ROOT + "datasets/datasets.tgz"

    def fetch_datasets_data(datasets_url=DATASETS_URL, datasets_path=RETAIL_PATH):
        os.makedirs(datasets_path, exist_ok=True)
        !wget {datasets_url} -P {datasets_path}
        tgz_path = os.path.join(datasets_path, "datasets.tgz")
        datasets_tgz = tarfile.open(tgz_path)
        datasets_tgz.extractall(path=datasets_path)
        datasets_tgz.close()

    # Corramos la función
    fetch_datasets_data()

else: 
    print("No estás en Colab. Esta celda no hizo nada.")

No estás en Colab. Esta celda no hizo nada.


# EXPLORAR DATA SET 1 - scanner_data.scv

## Cargando datos del datasets

In [3]:
def load_datasets_data(datasets_path=RETAIL_PATH):
    csv_path = os.path.join(datasets_path, "scanner_data.csv")
    return pd.read_csv(csv_path)

# The function loads the data as a Pandas DataFrame instance.
retail = load_datasets_data()

In [4]:
retail.head(2)

Unnamed: 0.1,Unnamed: 0,Date,Customer_ID,Transaction_ID,SKU_Category,SKU,Quantity,Sales_Amount
0,1,02/01/2016,2547,1,X52,0EM7L,1.0,3.13
1,2,02/01/2016,822,2,2ML,68BRQ,1.0,5.46


In [5]:
print(retail.columns)

Index(['Unnamed: 0', 'Date', 'Customer_ID', 'Transaction_ID', 'SKU_Category',
       'SKU', 'Quantity', 'Sales_Amount'],
      dtype='object')


# EXPLORAR DATASET 2 - online_retail_II.xlxs

## Cargando los datos

In [7]:
def load_datasets_h1(datasets_path=RETAIL_PATH):
    xlsx_path = os.path.join(datasets_path, "online_retail_II.xlsx")
    return pd.read_excel(xlsx_path)

# The function loads the data as a Pandas DataFrame instance.
retail_ol_h1 = load_datasets_h1()

In [8]:
retail_ol_h1.head(2)

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085.0,United Kingdom
1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom


In [10]:
print(retail_ol_h1.columns)

Index(['Invoice', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'Price', 'Customer ID', 'Country'],
      dtype='object')


In [9]:
def load_datasets_h2(datasets_path=RETAIL_PATH):
    xlsx_path = os.path.join(datasets_path, "online_retail_II.xlsx")
    return pd.read_excel(xlsx_path,1)

# The function loads the data as a Pandas DataFrame instance.
retail_ol_h2 = load_datasets_h2()

In [11]:
retail_ol_h2.head(2)

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [12]:
print(retail_ol_h2.columns)

Index(['Invoice', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'Price', 'Customer ID', 'Country'],
      dtype='object')
