In [None]:
import os

os.environ["MLFLOW_TRACKING_URI"] = "http://mlflow:5001"
os.environ["MLFLOW_MODEL_NAME"] = "AgriYieldPredictor"
os.environ["MLFLOW_EXPERIMENT_NAME"] = "Crop Yield Training"
os.environ["FS_OPENSTACK_SWIFT_CONTAINER_NAME"] = "object-persist-project-4"
os.environ["OS_APPLICATION_CREDENTIAL_ID"] = "<your-app-cred-id>"
os.environ["OS_APPLICATION_CREDENTIAL_SECRET"] = "<your-app-cred-secret>"
os.environ["OS_REGION_NAME"] = "CHI@TACC"
os.environ["OS_SWIFT_CONTAINER_NAME"] = os.environ["FS_OPENSTACK_SWIFT_CONTAINER_NAME"]


In [None]:
config = {
    "batch_size": 32,
    "epochs": 10,
    "lr": 1e-3,
    "hidden_dim": 64,
    "fips_embedding_dim": 16,
    "lstm_layers": 1,
    "tcn_channels": [64, 32],
    "dropout_rate": 0.1,
}

### Fetch Train/Eval/Test CSVs

In [None]:
from fetch_data import download_csvs_from_swift
download_csvs_from_swift(output_dir="output")

### Load Dataset

In [None]:
from pathlib import Path
from load_data import MultiCropYieldDataset

root = Path("/mnt/swift_store/transformed_data")
train_ds = MultiCropYieldDataset(root / "train.csv")
eval_ds = MultiCropYieldDataset(root / "eval.csv")

### Initialize Model

In [None]:
from model import LSTMTCNRegressor

input_dim = train_ds[0][0].shape[1]
num_fips = train_ds.get_num_fips()
num_crops = train_ds.get_num_crops()

model = LSTMTCNRegressor(
    input_dim=input_dim,
    num_fips=num_fips,
    num_crops=num_crops,
    fips_embedding_dim=16,
    hidden_dim=64,
    lstm_layers=1,
    tcn_channels=[64, 32],
    dropout_rate=0.1
)

### Train & Log to MLflow

In [None]:
import mlflow
from utils import train_model, collate_fn
import torch

mlflow.set_tracking_uri(os.environ["MLFLOW_TRACKING_URI"])
mlflow.set_experiment(os.environ["MLFLOW_EXPERIMENT_NAME"])

train_loader = torch.utils.data.DataLoader(train_ds, batch_size=32, shuffle=True, collate_fn=collate_fn)
eval_loader = torch.utils.data.DataLoader(eval_ds, batch_size=32, shuffle=False, collate_fn=collate_fn)

with mlflow.start_run(log_system_metrics=True):
    mlflow.log_params({
        "batch_size": 32,
        "epochs": 10,
        "lr": 1e-3,
        "hidden_dim": 64,
        "tcn_channels": [64, 32]
    })
    
    model = train_model(model, train_loader, eval_loader, num_epochs=10, lr=1e-3)
    mlflow.pytorch.log_model(model, "model")

### Promote Model

In [None]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
model_name = os.environ["MLFLOW_MODEL_NAME"]

versions = client.get_latest_versions(model_name, stages=["None"])
if versions:
    latest = versions[0]
    client.transition_model_version_stage(
        name=model_name,
        version=latest.version,
        stage="Staging"
    )
else:
    print("No model in 'None' stage to promote.")

### Predict from Registry

In [None]:
from predict import main as run_prediction
import sys

sys.argv = [
    "predict.py",
    "--stage", "Staging",
    "--fips-id", "0",
    "--crop-id", "0",
    "--csv", "output/test.csv"
]
run_prediction()