In [None]:
DESC TABLE CUSTOMER_REVIEWS;

In [None]:
DESC TABLE SHIPPING_LOGS;

In [None]:
CREATE OR REPLACE TABLE merged_reviews AS
SELECT 
  r.order_id,
  r.filename,
  r.product,
  r.review_date,
  r.review_text,
  s.shipping_date,
  s.carrier,
  s.tracking_number,
  s.latitude,
  s.longitude,
  s.status,
  s.delivery_days,
  s.late
FROM 
  customer_reviews r
JOIN 
  shipping_logs s
ON 
  r.order_id = s.order_id;

In [None]:
from snowflake.snowpark.context import get_active_session

session = get_active_session()
# Load parsed reviews into a DataFrame
df = session.table("merged_reviews")

# Show a sample of the data
df.show()

In [None]:
from snowflake.snowpark.functions import col

# Remove rows with missing or empty text
df_cleaned = df.filter(
    col("review_text").is_not_null() & (col("review_text") != "")
)

In [None]:
from snowflake.snowpark.functions import lower, trim

# Standardize the review text
df_lowercase = df_cleaned.with_column("review_text", trim(lower(col("review_text"))))

In [None]:
# Remove duplicate rows
df_deduped = df_lowercase.drop_duplicates(["order_id", "review_text"])

In [None]:
# Save cleaned data to a new table
df_deduped.write.mode("overwrite").save_as_table("clean_reviews")