In [1]:
# --- Price Prediction Model Training (with FAO/real data ready) ---
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

# 🔹 Example dataset (replace with FAO/local market data)
data = {
    "commodity": np.random.choice(["maize", "beans", "rice"], 200),
    "origin": np.random.choice(["Kenya", "Rwanda", "Uganda"], 200),
    "quantity": np.random.randint(10, 100, 200),  # in kg
    "description": np.random.choice(["organic", "hybrid seed", "dry", "wet"], 200),
    "price": np.random.randint(50, 300, 200)  # in local currency
}
df = pd.DataFrame(data)

# Features & target
X = df[["commodity", "origin", "quantity", "description"]]
y = df["price"]

# Preprocess categorical features
categorical = ["commodity", "origin", "description"]
numeric = ["quantity"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical),
        ("num", "passthrough", numeric)
    ]
)

# Pipeline
model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train
model.fit(X_train, y_train)

# Evaluate
score = model.score(X_test, y_test)
print(f"Model R² score: {score:.2f}")

# Save trained model
joblib.dump(model, "../models/price_model.pkl")
print("✅ Model saved as models/price_model.pkl")


Model R² score: -0.02
✅ Model saved as models/price_model.pkl
