In [1]:
# Importing the necessary libraries
from pandas import DataFrame
from sqlalchemy import create_engine
from typing import Dict
from pathlib import Path
import pandas as pd

from sqlalchemy.engine.base import Engine
from src.transform import QueryEnum
from src import config
from src.transform import run_queries
from src.extract import extract
from src.load import load
from src.plots import (
    plot_freight_value_weight_relationship,
    plot_global_amount_order_status,
    plot_real_vs_predicted_delivered_time,
    plot_revenue_by_month_year,
    plot_revenue_per_state,
    plot_top_10_least_revenue_categories,
    plot_top_10_revenue_categories,
    plot_top_10_revenue_categories_ammount,
    plot_delivery_date_difference,
    plot_order_amount_per_day_with_holidays,
)

# Create the database sql file
Path(config.SQLITE_BD_ABSOLUTE_PATH).touch()

# Create the database connection
ENGINE = create_engine(rf"sqlite:///{config.SQLITE_BD_ABSOLUTE_PATH}", echo=False)

csv_folder = config.DATASET_ROOT_PATH
public_holidays_url = config.PUBLIC_HOLIDAYS_URL

# 1. Get the mapping of the csv files to the table names.
csv_table_mapping = config.get_csv_to_table_mapping()

# 2. Extract the data from the csv files, holidays and load them into the dataframes.
csv_dataframes = extract(csv_folder, csv_table_mapping, public_holidays_url)

load(data_frames=csv_dataframes, database=ENGINE)

query_results: Dict[str, DataFrame] = run_queries(database=ENGINE)

All dataframes have been loaded into the database.


In [2]:
# Crear la conexión a la base de datos
engine = create_engine(rf"sqlite:///{config.SQLITE_BD_ABSOLUTE_PATH}", echo=False)

# Cargar los datos desde la base de datos SQLite en DataFrames de pandas
customers_df = pd.read_sql_table('olist_customers', con=engine)
geolocation_df = pd.read_sql_table('olist_geolocation', con=engine)
order_items_df = pd.read_sql_table('olist_order_items', con=engine)
order_payments_df = pd.read_sql_table('olist_order_payments', con=engine)
order_reviews_df = pd.read_sql_table('olist_order_reviews', con=engine)
orders_df = pd.read_sql_table('olist_orders', con=engine)
products_df = pd.read_sql_table('olist_products', con=engine)
sellers_df = pd.read_sql_table('olist_sellers', con=engine)
product_category_name_translation_df = pd.read_sql_table('product_category_name_translation', con=engine)
public_holidays_df = pd.read_sql_table('public_holidays', con=engine)

In [10]:
import pandas as pd
from sqlalchemy import create_engine

# Crear la conexión a la base de datos
engine = create_engine(f"sqlite:///{config.SQLITE_BD_ABSOLUTE_PATH}", echo=False)

# Cargar los datos desde la base de datos SQLite en DataFrames de pandas
dataframes = {
    'customers': pd.read_sql_table('olist_customers', con=engine),
    'geolocation': pd.read_sql_table('olist_geolocation', con=engine),
    'order_items': pd.read_sql_table('olist_order_items', con=engine),
    'order_payments': pd.read_sql_table('olist_order_payments', con=engine),
    'order_reviews': pd.read_sql_table('olist_order_reviews', con=engine),
    'orders': pd.read_sql_table('olist_orders', con=engine),
    'products': pd.read_sql_table('olist_products', con=engine),
    'sellers': pd.read_sql_table('olist_sellers', con=engine),
    'product_category_name_translation': pd.read_sql_table('product_category_name_translation', con=engine),
    'public_holidays': pd.read_sql_table('public_holidays', con=engine)
}

# Ruta del archivo TXT donde se guardarán los datos
output_file_path = 'top_10_rows_combined.txt'

# Guardar las primeras 10 filas de cada DataFrame en un único archivo TXT
with open(output_file_path, mode='w', encoding='utf-8') as file:
    for df_name, df in dataframes.items():
        file.write(f"{df_name}\n")
        file.write(df.head(10).to_string(index=False))
        file.write("\n\n")

print(f"Las primeras 10 filas de cada DataFrame se han guardado en {output_file_path}")

Las primeras 10 filas de cada DataFrame se han guardado en top_10_rows_combined.txt
