In [1]:
%run setup_notebook.ipynb

Successfully connected to py_brew database.


To find all CSV files in the `data` directory and its subdirectories using `pathlib`, we need to use the ***`rglob()`*** method instead of `glob()` to recursively search all files with a `.csv` extension under the `data` directory. 

In [2]:
from pathlib import Path 

# create a path object for the data directory
data_directory = Path('../../../data/')

# Use the rglob() method to find the CSV files in the data directory
csv_files = data_directory.rglob('*.csv')

# because Path returns a generator object, save it to a list to be able to reuse it later

csv_file_list = list(csv_files)

for file in csv_file_list:
    print(file)

../../../data/pybrew_dim_date/dim_date.csv
../../../data/pybrew_dim_customer/dim_customer.csv
../../../data/pybrew_dim_product/dim_product.csv
../../../data/pybrew_fact_sales/fact_sales.csv


In [3]:

# unpack the list to save each path to its own path variable
dim_date_path, dim_customer_path, dim_product_path, fact_sales_path = csv_file_list

print(dim_date_path)

../../../data/pybrew_dim_date/dim_date.csv


In [4]:
%%sql 

SHOW VARIABLES LIKE 'secure_file_priv'

Variable_name,Value
secure_file_priv,


In [5]:
%%sql 

-- set local_infile to ON/true/1 to enable LOCAL INFILE import
SET GLOBAL local_infile = true;

[]

In [6]:
%%sql 

SHOW GLOBAL VARIABLES LIKE 'local_infile';

Variable_name,Value
local_infile,ON


In [None]:
%%sql 
LOAD DATA LOCAL INFILE '../../../data/pybrew_dim_date/dim_date.csv'
INTO TABLE dim_date
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;

In [8]:
dates = %sql SELECT * FROM dim_date LIMIT 10

print(dates)

+---------+---------------------+------------+-------------+-------+------+
| date_id |        tmstp        |    date    | day_of_week | month | year |
+---------+---------------------+------------+-------------+-------+------+
|    1    | 2020-07-04 21:23:10 | 2020-07-04 |      4      |   7   | 2020 |
|    2    | 2021-07-03 07:13:40 | 2021-07-03 |      3      |   7   | 2021 |
|    3    | 2021-05-03 00:55:54 | 2021-05-03 |      3      |   5   | 2021 |
|    4    | 2021-03-16 10:03:55 | 2021-03-16 |      16     |   3   | 2021 |
|    5    | 2022-02-22 16:29:22 | 2022-02-22 |      22     |   2   | 2022 |
|    6    | 2022-06-10 22:19:28 | 2022-06-10 |      10     |   6   | 2022 |
|    7    | 2022-03-22 11:25:33 | 2022-03-22 |      22     |   3   | 2022 |
|    8    | 2020-11-20 08:06:37 | 2020-11-20 |      20     |   11  | 2020 |
|    9    | 2021-11-06 09:57:10 | 2021-11-06 |      6      |   11  | 2021 |
|    10   | 2021-06-29 22:11:10 | 2021-06-29 |      29     |   6   | 2021 |
+---------+-

In [None]:
%%sql 

LOAD DATA LOCAL INFILE '../../../data/pybrew_dim_customer/dim_customer.csv'
INTO TABLE dim_customer
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;

In [10]:
customers = %sql SELECT * FROM dim_customer LIMIT 10

print(customers)

+-------------+--------------------+----------------+----------------+
| customer_id |   customer_name    | customer_city  | customer_state |
+-------------+--------------------+----------------+----------------+
|     100     | Jennifer Gonzalez  |   Smithland    |    Arkansas    |
|     101     |   Anthony Turner   |    Fordland    | West Virginia  |
|     102     |   Jonathan Bell    |  Collinsmouth  |  Mississippi   |
|     103     |  Michelle Johnson  | South Paulland |    Montana     |
|     104     | Christopher Miller | North Loriview |  South Dakota  |
|     105     |    Brian Brown     |  North Jeremy  |   Minnesota    |
|     106     |    Cathy Suarez    |   South Ian    |   New Jersey   |
|     107     | David Fitzpatrick  |   Danielland   |    Wyoming     |
|     108     |     Juan Logan     |  Danielburgh   |   New Jersey   |
|     109     |    Corey Morgan    |    Leeland     |     Kansas     |
+-------------+--------------------+----------------+----------------+


In [None]:
%%sql 

LOAD DATA LOCAL INFILE '../../../data/pybrew_dim_product/dim_product.csv'
INTO TABLE dim_product
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;

In [12]:
products = %sql SELECT * FROM dim_product LIMIT 10

print(products)

+------------------+------------------+---------------------+--------------+---------------+
|    product_id    | product_category | product_subcategory | product_name | product_price |
+------------------+------------------+---------------------+--------------+---------------+
| COL:ICE:COL:1048 |  Cold Beverages  |     Iced Coffee     |  Cold Brew   |      4.50     |
| COL:ICE:COL:1051 |  Cold Beverages  |     Iced Coffee     |  Cold Brew   |      4.50     |
| COL:ICE:COL:1072 |  Cold Beverages  |     Iced Coffee     |  Cold Brew   |      4.50     |
| COL:ICE:COL:1105 |  Cold Beverages  |     Iced Coffee     |  Cold Brew   |      4.50     |
| COL:ICE:COL:1139 |  Cold Beverages  |     Iced Coffee     |  Cold Brew   |      4.50     |
| COL:ICE:COL:1169 |  Cold Beverages  |     Iced Coffee     |  Cold Brew   |      4.50     |
| COL:ICE:COL:1204 |  Cold Beverages  |     Iced Coffee     |  Cold Brew   |      4.50     |
| COL:ICE:COL:1268 |  Cold Beverages  |     Iced Coffee     |  Cold Br

In [None]:
%%sql 

LOAD DATA LOCAL INFILE '../../../data/pybrew_fact_sales/fact_sales.csv'
INTO TABLE fact_sales
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;

In [14]:
sales = %sql SELECT * FROM fact_sales LIMIT 10

print(sales)

+----------+--------------+-------------+---------+------------------+
| sales_id | sales_amount | customer_id | date_id |    product_id    |
+----------+--------------+-------------+---------+------------------+
|  100000  |    23.64     |     629     |   888   | HOT:TEA:EAR:1339 |
|  100001  |    44.67     |     751     |    76   | COL:ICE:ICE:1633 |
|  100002  |    26.24     |     329     |    67   | COL:ICE:ICE:1918 |
|  100003  |    46.86     |     305     |   315   | RET:MUG:PYB:1708 |
|  100004  |    45.11     |     435     |   293   | COL:ICE:ICE:1757 |
|  100005  |    46.04     |     102     |   584   | SNA:PAC:FRU:1900 |
|  100006  |    17.71     |     656     |   503   | HOT:COF:MOC:1413 |
|  100007  |    33.10     |     178     |   259   | HOT:TEA:HER:1153 |
|  100008  |    34.51     |     683     |   416   | SNA:BAK:MUF:1456 |
|  100009  |     9.42     |     436     |   742   | HOT:TEA:EAR:1717 |
+----------+--------------+-------------+---------+------------------+
