In [0]:
%sql
-- Create managed volume to copy files into
create volume dev.bronze.landing
comment 'This is a managed volume to store input raw';

In [0]:
# Directory to store data
dbutils.fs.mkdirs("/Volumes/dev/bronze/landing/input")

# Copy retail data CSV from databricks-datasets
dbutils.fs.cp("/databricks-datasets/definitive-guide/data/retail-data/by-day/2010-12-01.csv", "/Volumes/dev/bronze/landing/input")
dbutils.fs.cp("/databricks-datasets/definitive-guide/data/retail-data/by-day/2010-12-02.csv", "/Volumes/dev/bronze/landing/input")

In [0]:
%sql
-- Create a placeholder table to COPY INTO
create table dev.bronze.invoice_cp;

In [0]:
%sql
-- COPY INTO
COPY INTO dev.bronze.invoice_cp
FROM '/Volumes/dev/bronze/landing/input/' -- Location to read files from
FILEFORMAT = CSV -- Other valid file format options include PARQUET, AVRO etc.
PATTERN = '*.csv' -- Check for file names as such; * is a wildcard character
FORMAT_OPTIONS ( -- These options get applied to the source files (mergeSchema across input files, look for headers etc.)
  'mergeSchema' = 'true',
  'header' = 'true'
)
COPY_OPTIONS ( -- These options get applied to the target table (infer the schema, overwrite on write etc.)
  'mergeSchema' = 'true'
)
-- Reruns -> no rows affected, inserted or skipped

In [0]:
%sql
select * from dev.bronze.invoice_cp;

In [0]:
%sql
describe extended dev.bronze.invoice_cp;
-- Stores a _copy_into_log at the specified location

In [0]:
# Copy a new file from retail data CSV from databricks-datasets
dbutils.fs.cp("/databricks-datasets/definitive-guide/data/retail-data/by-day/2010-12-03.csv", "/Volumes/dev/bronze/landing/input")

In [0]:
%sql
-- COPY INTO
COPY INTO dev.bronze.invoice_cp
FROM '/Volumes/dev/bronze/landing/input/' -- Location to read files from
FILEFORMAT = CSV -- Other valid file format options include PARQUET, AVRO etc.
PATTERN = '*.csv' -- Check for file names as such; * is a wildcard character
FORMAT_OPTIONS ( -- These options get applied to the source files (mergeSchema across input files, look for headers etc.)
  'mergeSchema' = 'true',
  'header' = 'true'
)
COPY_OPTIONS ( -- These options get applied to the target table (infer the schema, overwrite on write etc.)
  'mergeSchema' = 'true'
)
-- Num rows affected = num rows in data = 2202

In [0]:
%sql
select count(*) from dev.bronze.invoice_cp;

In [0]:
%sql
-- It is also possible to copy over into custom columns
create table dev.bronze.invoice_cp_custom(
  InvoiceNo string,
  StockCode string,
  Quantity double,
  _insert_date timestamp
);

COPY INTO dev.bronze.invoice_cp_custom
FROM (
  select InvoiceNO, StockCode, cast(Quantity as double) Quantity, current_timestamp() _insert_date
  FROM
  "/Volumes/dev/bronze/landing/input"
)
FILEFORMAT = CSV
FORMAT_OPTIONS (
  "mergeSchema" = "true",
  "header" = "true"
);

In [0]:
%sql
select * from dev.bronze.invoice_cp_custom;