#### Creating `product` Delta table using DDL Script

In [0]:
%fs ls /user/hive/warehouse/product

path,name,size,modificationTime
dbfs:/user/hive/warehouse/product/_delta_log/,_delta_log/,0,0


In [0]:
%fs rm -r /user/hive/warehouse/product

In [0]:
%sql
DROP TABLE IF EXISTS product;
CREATE  TABLE IF NOT EXISTS `product` (
  `productCode` STRING,
  `productName` STRING,
  `productLine` STRING,
  `productScale` STRING,
  `productVendor` STRING,
  `productDescription` STRING,
  `quantityInStock` INT,
  `buyPrice` DOUBLE,
  `MSRP` DOUBLE,
  `createdDate` TIMESTAMP,
  `createdBy` STRING,
  `updatedDate` TIMESTAMP,
  `updatedBy` STRING)
USING delta;

##### Creating `pyspark Schema` for product table to avoid datatypes issues while reading data.

In [0]:
from pyspark.sql.types import StringType,IntegerType,DoubleType,StructField,StructType
product_schema= StructType([StructField("productCode",StringType(),True),
                            StructField("productName",StringType(),True),
                            StructField("productLine",StringType(),True),
                            StructField("productScale",StringType(),True),
                            StructField("productVendor",StringType(),True),
                            StructField("productDescription",StringType(),True),
                            StructField("quantityInStock",IntegerType(),True),
                            StructField("buyPrice",DoubleType(),True),
                            StructField("MSRP",DoubleType(),True)])

In [0]:
product_badpath="/tmp/productsBadPath/"
product_source_file="dbfs:/FileStore/products/product.txt"
product_delimiter="\t"
product_delta_file="dbfs:/FileStore/products/product_delta.txt"

In [0]:
import urllib.request
urllib.request.urlretrieve("https://raw.githubusercontent.com/master/poc_product/product.txt","/tmp/product.txt")



Out[12]: ('/tmp/product.txt', <http.client.HTTPMessage at 0x7f26acddecd0>)

In [0]:
%sh
ls /tmp

Rserv
Rtmp5kSE6D
chauffeur-daemon-params
chauffeur-daemon.pid
chauffeur-env.sh
custom-spark.conf
driver-daemon-params
driver-daemon.pid
driver-env.sh
hsperfdata_root
product.txt
python_lsp_logs
systemd-private-8927e32899474482ba02ddf53f93f375-apache2.service-CT8Qaf
systemd-private-8927e32899474482ba02ddf53f93f375-ntp.service-zG260h
systemd-private-8927e32899474482ba02ddf53f93f375-systemd-logind.service-tpValg
systemd-private-8927e32899474482ba02ddf53f93f375-systemd-resolved.service-MOSj9h
tmp.urlF6ZWtFs


In [0]:
%fs ls dbfs:/FileStore/products/

path,name,size,modificationTime
dbfs:/FileStore/products/product.txt,product.txt,29497,1706779608000
dbfs:/FileStore/products/product_delta.txt,product_delta.txt,861,1706779611000


In [0]:
dbutils.fs.mv("file:/tmp/product.txt","dbfs:/FileStore/products/product.txt")

Out[14]: True

In [0]:
urllib.request.urlretrieve("https://raw.githubusercontent.com/master/poc_product/product_delta.txt","/tmp/product_delta.txt")
dbutils.fs.mv("file:/tmp/product_delta.txt","dbfs:/FileStore/products/product_delta.txt")
print('product files copied to ',product_source_file)
print('product delta files copied to ',product_delta_file)

product files copied to  dbfs:/FileStore/products/product.txt
product delta files copied to  dbfs:/FileStore/products/product_delta.txt


In [0]:
%fs head dbfs:/FileStore/products/product_delta.txt

In [0]:
%fs head dbfs:/FileStore/products/product_delta.txt

In [0]:
%fs head dbfs:/FileStore/products/product.txt