# Passo 4: Otimização de Preços (Spark)

## Objetivo
Utilizar o modelo treinado (Spark ML) para encontrar o preço ótimo que maximiza o lucro.
A simulação é feita em 'batch' criando um DataFrame com cenários de preço.

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit, max as max_, row_number
from pyspark.sql.window import Window
from pyspark.ml import PipelineModel
import numpy as np

# Initialize Spark
spark = SparkSession.builder \
    .appName("RetailPriceOptimization") \
    .config("spark.sql.warehouse.dir", "spark-warehouse") \
    .getOrCreate()

# Load Model
model_path = "../models/spark_model_gbt"
model = PipelineModel.load(model_path)
print("Model loaded.")

# Load Data
df = spark.table("retail_price_clean")


In [None]:
from pyspark.sql.functions import lag, month, year, when

# Re-apply Feature Engineering (Lags, etc) to get current state
window_spec = Window.partitionBy("product_id").orderBy("date")
df_feat = df.withColumn("lag_qty_1", lag("qty", 1).over(window_spec)) \
            .withColumn("lag_price_1", lag("unit_price", 1).over(window_spec)) \
            .na.fill(0) \
            .withColumn("price_diff_comp1", col("unit_price") - col("comp_1")) \
            .withColumn("month", month("date"))

# Pick one product to optimize (taking the first one found)
pid = df_feat.select("product_id").first()[0]
print(f"Optimizing for Product: {pid}")

# Get the latest state of this product
latest_row = df_feat.filter(col("product_id") == pid).orderBy(col("date").desc()).limit(1)
base_price = latest_row.select("unit_price").collect()[0][0]
comp_price = latest_row.select("comp_1").collect()[0][0]

print(f"Base Price: {base_price}, Competitor Price: {comp_price}")

In [None]:
# Generate Scenarios (Batch Simulation)
prices = np.linspace(base_price * 0.5, base_price * 1.5, 50)
cost = base_price * 0.6

# Collect row as dict
row_dict = latest_row.collect()[0].asDict()

scenarios = []
for p in prices:
    new_row = row_dict.copy()
    new_row['unit_price'] = float(p)
    new_row['price_diff_comp1'] = float(p - comp_price)
    scenarios.append(new_row)

# Create DataFrame
df_scenarios = spark.createDataFrame(scenarios)

# Run Prediction
predictions = model.transform(df_scenarios)

# Calculate Profit
df_res = predictions.withColumn("profit", (col("unit_price") - cost) * col("prediction"))

# Find Max Profit
best_scenario = df_res.orderBy(col("profit").desc()).first()

opt_price = best_scenario["unit_price"]
max_profit = best_scenario["profit"]
pred_demand = best_scenario["prediction"]

print(f"Optimal Price: {opt_price:.2f}")
print(f"Max Profit: {max_profit:.2f}")
print(f"Predicted Demand: {pred_demand:.2f}")

In [None]:
# Plot
import matplotlib.pyplot as plt

data_plot = df_res.select("unit_price", "profit", "prediction").orderBy("unit_price").collect()
x_prices = [r["unit_price"] for r in data_plot]
y_profits = [r["profit"] for r in data_plot]
y_demand = [r["prediction"] for r in data_plot]

fig, ax1 = plt.subplots(figsize=(10, 6))

ax1.set_xlabel('Price')
ax1.set_ylabel('Profit', color='green')
ax1.plot(x_prices, y_profits, color='green', label='Profit')
ax1.tick_params(axis='y', labelcolor='green')

ax2 = ax1.twinx()
ax2.set_ylabel('Demand', color='blue')
ax2.plot(x_prices, y_demand, color='blue', linestyle='--', label='Demand')
ax2.tick_params(axis='y', labelcolor='blue')

plt.title("Price Optimization")
plt.axvline(opt_price, color='red', linestyle=':', label='Optimal')
plt.show()