In [None]:
#Task 1 Data Ingestion
from pyspark.sql import SparkSession
import os
spark = SparkSession.builder.appName("Product Inventory Ingestion").getOrCreate()

file_path = "/content/sample_data/tables/product_inventory.csv"
logging.basicConfig(filename='/content/sample_data/logs/inventory_ingestion.log', level=logging.INFO)

try:
    if os.path.exists(file_path):
        product_df = spark.read.format("csv").option("header", "true").load(file_path)
        product_df.write.format("delta").mode("overwrite").save("/content/sample_data/delta/product_inventory_raw")
        logging.info("Product inventory ingestion completed successfully.")
    else:
        raise FileNotFoundError(f"File {file_path} not found.")

except FileNotFoundError as e:
    logging.error(f"FileNotFoundError: {str(e)}")
except Exception as e:
    logging.error(f"An error occurred: {str(e)}")


In [None]:
#Task 2 Data Cleaning

product_df = spark.read.format("delta").load("/content/sample_data/delta/product_inventory_raw")
cleaned_df = product_df.na.fill({"StockQuantity": 0, "Price": 0.0})
cleaned_df = cleaned_df.filter(col("StockQuantity") >= 0)

cleaned_df.write.format("delta").mode("overwrite").save("/content/sample_data/delta/product_inventory_cleaned")
print("Product inventory cleaning completed successfully.")


In [None]:
#Task 3 Inventory Analysis
from pyspark.sql.functions import col, expr
cleaned_df = spark.read.format("delta").load("/content/sample_data/delta/product_inventory_cleaned")

stock_value_df = cleaned_df.withColumn("TotalStockValue", col("StockQuantity") * col("Price"))
restock_df = cleaned_df.filter(col("StockQuantity") < 100)

stock_value_df.write.format("delta").mode("overwrite").save("/content/sample_data/delta/product_inventory_analysis")
restock_df.write.format("delta").mode("overwrite").save("/content/sample_data/delta/product_inventory_restock")
print("Product inventory analysis completed successfully.")


In [None]:
#Task 4 Build an Inventory Pipeline
import subprocess
import logging
logging.basicConfig(filename='/content/sample_data/logs/inventory_pipeline_log.log', level=logging.INFO)

notebooks = [
    "/content/sample_data/delta/product_inventory_raw",
    "/content/sample_data/delta/product_inventory_cleaned",
    "/content/sample_data/delta/product_inventory_analysis"
]

for notebook in notebooks:
    try:
        subprocess.run(["databricks", "workspace", "import", notebook], check=True)
        logging.info(f"Successfully executed {notebook}")
    except subprocess.CalledProcessError as e:
        logging.error(f"Error occurred while executing {notebook}: {e}")


In [None]:
#Task 5
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Inventory Monitoring").getOrCreate()

inventory_df = spark.read.format("delta").load("/content/sample_data/delta/product_inventory_cleaned")


urgent_restock_df = inventory_df.filter(col("StockQuantity") < 50)

if urgent_restock_df.count() > 0:
    print("Alert: Some products need urgent restocking!")
    urgent_restock_df.show()
else:
    print("No products need urgent restocking at the moment.")
