# Main

In [18]:
pip install pulp

Collecting pulp
  Downloading pulp-3.3.0-py3-none-any.whl.metadata (8.4 kB)
Downloading pulp-3.3.0-py3-none-any.whl (16.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pulp
Successfully installed pulp-3.3.0
Note: you may need to restart the kernel to use updated packages.


In [14]:
# Enhanced LNG Trade Strategy Optimization
# P&L factoring in demand/supply, market prices, and counterparty risks

import pandas as pd
import numpy as np
import json
from typing import Dict, List, Tuple
import matplotlib.pyplot as plt
import seaborn as sns
from dataclasses import dataclass
from scipy.stats import norm
import warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("🚢 Enhanced LNG Trade Strategy Optimization")
print("=" * 50)

🚢 Enhanced LNG Trade Strategy Optimization


In [15]:
# 1. Enhanced Data Models and Risk Management Classes

@dataclass
class CounterpartyRisk:
    """Counterparty risk profile"""
    code: str
    name: str
    credit_rating: str
    credit_limit: float
    default_probability: float
    country_risk: float = 0.0

@dataclass
class MarketProfile:
    """Market demand/supply profile"""
    code: str
    name: str
    base_demand: float
    demand_elasticity: float
    seasonal_factors: Dict[str, float]
    volatility: float
    supply_disruption_risk: float

class CounterpartyRiskManager:
    """Manages counterparty credit risk and exposure limits"""
    
    def __init__(self):
        self.counterparties = {}
        self.exposure_limits = {}
    
    def add_counterparty(self, risk_profile: CounterpartyRisk):
        """Add a counterparty risk profile"""
        self.counterparties[risk_profile.code] = risk_profile
        self.exposure_limits[risk_profile.code] = risk_profile.credit_limit
    
    def get_risk_adjustment(self, counterparty_code: str, exposure: float) -> float:
        """Calculate risk adjustment based on counterparty credit quality and exposure"""
        if counterparty_code not in self.counterparties:
            return 0.0
        
        counterparty = self.counterparties[counterparty_code]
        
        # Base risk from credit rating
        rating_risk = self._rating_to_risk(counterparty.credit_rating)
        
        # Exposure concentration risk
        credit_limit = self.exposure_limits[counterparty_code]
        utilization = exposure / credit_limit if credit_limit > 0 else 0
        concentration_risk = max(0, (utilization - 0.7) * 0.2)  # Penalty above 70% utilization
        
        # Country risk
        country_risk = counterparty.country_risk
        
        return rating_risk + concentration_risk + country_risk
    
    def _rating_to_risk(self, rating: str) -> float:
        """Convert credit rating to risk adjustment factor"""
        rating_map = {
            'AAA': 0.001, 'AA+': 0.002, 'AA': 0.003, 'AA-': 0.004,
            'A+': 0.005, 'A': 0.007, 'A-': 0.010,
            'BBB+': 0.015, 'BBB': 0.020, 'BBB-': 0.030,
            'BB+': 0.040, 'BB': 0.060, 'BB-': 0.080,
            'B+': 0.100, 'B': 0.150, 'B-': 0.200,
            'CCC': 0.300, 'CC': 0.500, 'C': 0.800
        }
        return rating_map.get(rating, 0.100)  # Default to high risk

class DemandSupplyModel:
    """Models demand/supply dynamics and market behavior"""
    
    def __init__(self):
        self.markets = {}
        self.base_prices = {}
    
    def add_market(self, market_profile: MarketProfile, base_price: float):
        """Add a market profile with base price"""
        self.markets[market_profile.code] = market_profile
        self.base_prices[market_profile.code] = base_price
    
    def calculate_effective_demand(self, market_code: str, price: float, 
                                 season: str = "normal") -> float:
        """Calculate effective demand considering price elasticity and seasonality"""
        if market_code not in self.markets:
            return 0.0
        
        market = self.markets[market_code]
        base_price = self.base_prices[market_code]
        
        # Price elasticity effect
        price_ratio = price / base_price
        elasticity = market.demand_elasticity
        price_effect = price_ratio ** elasticity
        
        # Seasonal effect
        seasonal_factor = market.seasonal_factors.get(season, 1.0)
        
        # Supply disruption risk
        disruption_factor = 1 - market.supply_disruption_risk
        
        return market.base_demand * price_effect * seasonal_factor * disruption_factor
    
    def calculate_price_volatility_impact(self, market_code: str, confidence_level: float = 0.95) -> float:
        """Calculate Value-at-Risk impact from price volatility"""
        if market_code not in self.markets:
            return 0.0
        
        volatility = self.markets[market_code].volatility
        z_score = norm.ppf(1 - confidence_level)
        return z_score * volatility

print("✅ Risk management and demand/supply models defined")

✅ Risk management and demand/supply models defined


In [16]:
# 2. Enhanced Financial Calculations

def calculate_enhanced_unit_profit(
    price_per_unit: float,
    distance_nm: float,
    handling_fee_per_unit: float,
    boiloff_rate_per_1000nm: float,
    freight_cost_per_nm_per_unit: float,
    variable_cost_per_unit: float,
    carbon_cost_per_unit: float = 0.0,
    counterparty_risk_adjustment: float = 0.0,
    market_volatility_impact: float = 0.0
) -> Dict[str, float]:
    """
    Enhanced unit profit calculation with risk adjustments
    Returns detailed breakdown of profit components
    """
    # Basic calculations
    delivered_frac = max(0.0, 1.0 - (distance_nm / 1000.0) * boiloff_rate_per_1000nm)
    revenue = price_per_unit * delivered_frac
    
    # Cost components
    freight = freight_cost_per_nm_per_unit * distance_nm
    total_costs = variable_cost_per_unit + handling_fee_per_unit + freight + carbon_cost_per_unit
    
    # Base profit
    base_profit = revenue - total_costs
    
    # Risk adjustments
    counterparty_risk_cost = base_profit * counterparty_risk_adjustment
    volatility_cost = abs(base_profit) * market_volatility_impact
    
    # Risk-adjusted profit
    risk_adjusted_profit = base_profit - counterparty_risk_cost - volatility_cost
    
    return {
        'base_profit': base_profit,
        'risk_adjusted_profit': risk_adjusted_profit,
        'revenue': revenue,
        'total_costs': total_costs,
        'delivered_fraction': delivered_frac,
        'counterparty_risk_cost': counterparty_risk_cost,
        'volatility_cost': volatility_cost,
        'net_margin': risk_adjusted_profit / revenue if revenue > 0 else 0
    }

def build_enhanced_profit_table(
    ports_df: pd.DataFrame, 
    price_map: Dict[str, float],
    assumptions: Dict[str, float],
    risk_manager: CounterpartyRiskManager,
    demand_model: DemandSupplyModel
) -> pd.DataFrame:
    """Build comprehensive profit table with risk adjustments"""
    
    rows = []
    for _, port in ports_df.iterrows():
        code = port["code"]
        price = price_map.get(code, np.nan)
        
        if np.isnan(price):
            continue
            
        # Calculate risk adjustments
        counterparty_risk = risk_manager.get_risk_adjustment(code, 0)  # Initial exposure = 0
        volatility_impact = demand_model.calculate_price_volatility_impact(code)
        
        # Calculate enhanced unit profit
        profit_components = calculate_enhanced_unit_profit(
            price_per_unit=price,
            distance_nm=float(port["distance_nm"]),
            handling_fee_per_unit=float(port["handling_fee_per_unit"]),
            boiloff_rate_per_1000nm=float(assumptions["boiloff_rate_per_1000nm"]),
            freight_cost_per_nm_per_unit=float(assumptions["freight_cost_per_nm_per_unit"]),
            variable_cost_per_unit=float(assumptions["variable_cost_per_unit"]),
            carbon_cost_per_unit=float(assumptions.get("carbon_cost_per_unit", 0.0)),
            counterparty_risk_adjustment=counterparty_risk,
            market_volatility_impact=volatility_impact
        )
        
        # Calculate effective demand
        effective_demand = demand_model.calculate_effective_demand(code, price)
        
        # Calculate risk-adjusted capacity (considering counterparty limits)
        credit_limit = risk_manager.exposure_limits.get(code, float('inf'))
        risk_adjusted_capacity = min(
            float(port["monthly_capacity_cargo"]),
            credit_limit,
            effective_demand
        )
        
        rows.append({
            "code": code,
            "name": port["name"],
            "distance_nm": port["distance_nm"],
            "monthly_capacity_cargo": port["monthly_capacity_cargo"],
            "risk_adjusted_capacity": risk_adjusted_capacity,
            "price_per_unit": price,
            "effective_demand": effective_demand,
            "base_profit": profit_components['base_profit'],
            "risk_adjusted_profit": profit_components['risk_adjusted_profit'],
            "delivered_fraction": profit_components['delivered_fraction'],
            "counterparty_risk": counterparty_risk,
            "volatility_impact": volatility_impact,
            "net_margin": profit_components['net_margin'],
            "credit_limit": credit_limit
        })
    
    return pd.DataFrame(rows)

print("✅ Enhanced financial calculations defined")

✅ Enhanced financial calculations defined


In [19]:
# 3. Enhanced Optimization with Risk Constraints

import pulp

def optimize_with_risk_constraints(
    profit_table: pd.DataFrame,
    total_supply_units: float,
    risk_manager: CounterpartyRiskManager,
    max_portfolio_risk: float = 0.15,
    diversification_factor: float = 0.3
) -> Dict:
    """
    Enhanced optimization considering multiple risk constraints
    """
    codes = profit_table["code"].tolist()
    
    # Create optimization problem
    m = pulp.LpProblem("lng_allocation_enhanced", pulp.LpMaximize)
    x = pulp.LpVariable.dicts("alloc", codes, lowBound=0)
    
    # Objective: maximize risk-adjusted profit
    risk_adjusted_profit = {row["code"]: float(row["risk_adjusted_profit"]) for _, row in profit_table.iterrows()}
    m += pulp.lpSum([risk_adjusted_profit[c] * x[c] for c in codes])
    
    # Supply constraint
    m += pulp.lpSum([x[c] for c in codes]) <= float(total_supply_units), "SupplyLimit"
    
    # Individual capacity constraints
    for _, row in profit_table.iterrows():
        code = row["code"]
        capacity = float(row["risk_adjusted_capacity"])
        m += x[code] <= capacity, f"Capacity_{code}"
    
    # Credit limit constraints
    for code in codes:
        credit_limit = risk_manager.exposure_limits.get(code, float('inf'))
        if credit_limit < float('inf'):
            m += x[code] <= credit_limit, f"CreditLimit_{code}"
    
    # Portfolio diversification constraint (no single counterparty > 50% of supply)
    max_single_allocation = total_supply_units * (1 - diversification_factor)
    for code in codes:
        m += x[code] <= max_single_allocation, f"Diversification_{code}"
    
    # Solve optimization
    m.solve(pulp.PULP_CBC_CMD(msg=False))
    
    # Extract results
    allocation = {c: float(x[c].value() or 0.0) for c in codes}
    objective = pulp.value(m.objective)
    
    # Calculate risk metrics
    total_exposure = sum(allocation.values())
    risk_metrics = calculate_portfolio_risk_metrics(allocation, profit_table, risk_manager)
    
    return {
        "allocation": allocation,
        "objective": objective,
        "total_exposure": total_exposure,
        "risk_metrics": risk_metrics,
        "optimization_status": pulp.LpStatus[m.status]
    }

def calculate_portfolio_risk_metrics(
    allocation: Dict[str, float],
    profit_table: pd.DataFrame,
    risk_manager: CounterpartyRiskManager
) -> Dict[str, float]:
    """Calculate comprehensive portfolio risk metrics"""
    
    # Concentration risk (Herfindahl index)
    total_allocation = sum(allocation.values())
    if total_allocation > 0:
        herfindahl = sum((allocation[code] / total_allocation) ** 2 for code in allocation.keys())
    else:
        herfindahl = 0.0
    
    # Average counterparty risk
    weighted_risk = 0.0
    for code, amount in allocation.items():
        if amount > 0:
            risk = risk_manager.get_risk_adjustment(code, amount)
            weighted_risk += risk * amount
    
    avg_counterparty_risk = weighted_risk / total_allocation if total_allocation > 0 else 0.0
    
    # Credit utilization
    max_utilization = 0.0
    for code, amount in allocation.items():
        if amount > 0:
            credit_limit = risk_manager.exposure_limits.get(code, float('inf'))
            if credit_limit < float('inf'):
                utilization = amount / credit_limit
                max_utilization = max(max_utilization, utilization)
    
    return {
        "herfindahl_index": herfindahl,
        "avg_counterparty_risk": avg_counterparty_risk,
        "max_credit_utilization": max_utilization,
        "diversification_score": 1 - herfindahl,  # Higher is better
        "risk_score": (herfindahl + avg_counterparty_risk + max_utilization) / 3
    }

print("✅ Enhanced optimization with risk constraints defined")

✅ Enhanced optimization with risk constraints defined


In [25]:
# 3. Enhanced Optimization with Risk Constraints

import pulp

def optimize_with_risk_constraints(
    profit_table: pd.DataFrame,
    total_supply_units: float,
    risk_manager: CounterpartyRiskManager,
    max_portfolio_risk: float = 0.15,
    diversification_factor: float = 0.3
) -> Dict:
    """
    Enhanced optimization considering multiple risk constraints
    """
    codes = profit_table["code"].tolist()
    
    # Create optimization problem
    m = pulp.LpProblem("lng_allocation_enhanced", pulp.LpMaximize)
    x = pulp.LpVariable.dicts("alloc", codes, lowBound=0)
    
    # Objective: maximize risk-adjusted profit
    risk_adjusted_profit = {row["code"]: float(row["risk_adjusted_profit"]) for _, row in profit_table.iterrows()}
    m += pulp.lpSum([risk_adjusted_profit[c] * x[c] for c in codes])
    
    # Supply constraint
    m += pulp.lpSum([x[c] for c in codes]) <= float(total_supply_units), "SupplyLimit"
    
    # Individual capacity constraints
    for _, row in profit_table.iterrows():
        code = row["code"]
        capacity = float(row["risk_adjusted_capacity"])
        m += x[code] <= capacity, f"Capacity_{code}"
    
    # Credit limit constraints
    for code in codes:
        credit_limit = risk_manager.exposure_limits.get(code, float('inf'))
        if credit_limit < float('inf'):
            m += x[code] <= credit_limit, f"CreditLimit_{code}"
    
    # Portfolio diversification constraint (no single counterparty > 50% of supply)
    max_single_allocation = total_supply_units * (1 - diversification_factor)
    for code in codes:
        m += x[code] <= max_single_allocation, f"Diversification_{code}"
    
    # Solve optimization
    m.solve(pulp.PULP_CBC_CMD(msg=False))
    
    # Extract results
    allocation = {c: float(x[c].value() or 0.0) for c in codes}
    objective = pulp.value(m.objective)
    
    # Calculate risk metrics
    total_exposure = sum(allocation.values())
    risk_metrics = calculate_portfolio_risk_metrics(allocation, profit_table, risk_manager)
    
    return {
        "allocation": allocation,
        "objective": objective,
        "total_exposure": total_exposure,
        "risk_metrics": risk_metrics,
        "optimization_status": pulp.LpStatus[m.status]
    }

def calculate_portfolio_risk_metrics(
    allocation: Dict[str, float],
    profit_table: pd.DataFrame,
    risk_manager: CounterpartyRiskManager
) -> Dict[str, float]:
    """Calculate comprehensive portfolio risk metrics"""
    
    # Concentration risk (Herfindahl index)
    total_allocation = sum(allocation.values())
    if total_allocation > 0:
        herfindahl = sum((allocation[code] / total_allocation) ** 2 for code in allocation.keys())
    else:
        herfindahl = 0.0
    
    # Average counterparty risk
    weighted_risk = 0.0
    for code, amount in allocation.items():
        if amount > 0:
            risk = risk_manager.get_risk_adjustment(code, amount)
            weighted_risk += risk * amount
    
    avg_counterparty_risk = weighted_risk / total_allocation if total_allocation > 0 else 0.0
    
    # Credit utilization
    max_utilization = 0.0
    for code, amount in allocation.items():
        if amount > 0:
            credit_limit = risk_manager.exposure_limits.get(code, float('inf'))
            if credit_limit < float('inf'):
                utilization = amount / credit_limit
                max_utilization = max(max_utilization, utilization)
    
    return {
        "herfindahl_index": herfindahl,
        "avg_counterparty_risk": avg_counterparty_risk,
        "max_credit_utilization": max_utilization,
        "diversification_score": 1 - herfindahl,  # Higher is better
        "risk_score": (herfindahl + avg_counterparty_risk + max_utilization) / 3
    }

print("✅ Enhanced optimization with risk constraints defined")

✅ Enhanced optimization with risk constraints defined


In [20]:
# 4. Load and Setup Data

# Load existing data
ports_df = pd.read_csv("data/ports.csv")
with open("data/base_inputs.json") as f:
    base_inputs = json.load(f)

# Enhanced assumptions
enhanced_assumptions = {
    **base_inputs["assumptions"],
    "risk_tolerance": 0.15,
    "diversification_target": 0.3,
    "confidence_level": 0.95
}

# Enhanced price data with volatility
enhanced_prices = {
    **base_inputs["prices_usd_per_unit"],
    "SLNG": 12.0,  # Singapore
    "JP": 14.0,    # Japan
    "CN": 11.0     # China
}

print("📊 Data loaded successfully")
print(f"Ports: {len(ports_df)} destinations")
print(f"Base supply: {enhanced_assumptions['supply_cargo_units']} units")

📊 Data loaded successfully
Ports: 3 destinations
Base supply: 20 units


In [21]:
# 5. Setup Risk Management and Market Models

# Initialize risk manager
risk_manager = CounterpartyRiskManager()

# Add counterparty risk profiles
counterparties = [
    CounterpartyRisk("SLNG", "Singapore LNG", "AA", 50.0, 0.002, 0.02),
    CounterpartyRisk("JP", "Japan (JKM)", "AAA", 100.0, 0.001, 0.01),
    CounterpartyRisk("CN", "China", "A", 30.0, 0.005, 0.05)
]

for cp in counterparties:
    risk_manager.add_counterparty(cp)

# Initialize demand/supply model
demand_model = DemandSupplyModel()

# Add market profiles
markets = [
    MarketProfile("SLNG", "Singapore LNG", 15.0, -0.3, 
                 {"winter": 1.2, "summer": 0.8, "normal": 1.0}, 0.15, 0.02),
    MarketProfile("JP", "Japan (JKM)", 20.0, -0.4,
                 {"winter": 1.5, "summer": 0.7, "normal": 1.0}, 0.20, 0.01),
    MarketProfile("CN", "China", 18.0, -0.6,
                 {"winter": 1.3, "summer": 0.9, "normal": 1.0}, 0.25, 0.05)
]

for market in markets:
    base_price = enhanced_prices[market.code]
    demand_model.add_market(market, base_price)

print("✅ Risk management and market models initialized")
print(f"Counterparties: {len(risk_manager.counterparties)}")
print(f"Markets: {len(demand_model.markets)}")

✅ Risk management and market models initialized
Counterparties: 3
Markets: 3


In [23]:
# 6. Build Enhanced Profit Table

# Build comprehensive profit table
profit_table = build_enhanced_profit_table(
    ports_df, 
    enhanced_prices, 
    enhanced_assumptions,
    risk_manager, 
    demand_model
)

print("📈 Enhanced Profit Analysis")
print("=" * 50)
print(profit_table[['name', 'base_profit', 'risk_adjusted_profit', 'counterparty_risk', 
                   'volatility_impact', 'net_margin', 'effective_demand']].round(3))

# Visualize profit components
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Base vs Risk-adjusted profit
axes[0,0].bar(profit_table['name'], profit_table['base_profit'], 
              alpha=0.7, label='Base Profit', color='lightblue')
axes[0,0].bar(profit_table['name'], profit_table['risk_adjusted_profit'], 
              alpha=0.7, label='Risk-adjusted Profit', color='darkblue')
axes[0,0].set_title('Base vs Risk-adjusted Profit')
axes[0,0].set_ylabel('Profit per Unit ($)')
axes[0,0].legend()
axes[0,0].tick_params(axis='x', rotation=45)

# Risk components
axes[0,1].bar(profit_table['name'], profit_table['counterparty_risk'], 
              alpha=0.7, label='Counterparty Risk', color='red')
axes[0,1].bar(profit_table['name'], profit_table['volatility_impact'], 
              alpha=0.7, label='Volatility Impact', color='orange')
axes[0,1].set_title('Risk Components')
axes[0,1].set_ylabel('Risk Adjustment Factor')
axes[0,1].legend()
axes[0,1].tick_params(axis='x', rotation=45)

# Net margins
axes[1,0].bar(profit_table['name'], profit_table['net_margin'], 
              color='green', alpha=0.7)
axes[1,0].set_title('Net Margins')
axes[1,0].set_ylabel('Net Margin (%)')
axes[1,0].tick_params(axis='x', rotation=45)

# Effective demand vs capacity
axes[1,1].bar(profit_table['name'], profit_table['monthly_capacity_cargo'], 
              alpha=0.5, label='Capacity', color='lightgray')
axes[1,1].bar(profit_table['name'], profit_table['effective_demand'], 
              alpha=0.7, label='Effective Demand', color='purple')
axes[1,1].set_title('Capacity vs Effective Demand')
axes[1,1].set_ylabel('Units')
axes[1,1].legend()
axes[1,1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

📈 Enhanced Profit Analysis
          name  base_profit  risk_adjusted_profit  counterparty_risk  \
0    Singapore       -5.261                -3.842              0.023   
1  Japan (JKM)      -27.338               -18.044              0.011   
2        China      -24.305               -12.925              0.057   

   volatility_impact  net_margin  effective_demand  
0             -0.247      -0.320              14.7  
1             -0.329      -1.292              19.8  
2             -0.411      -1.178              17.1  


In [28]:
print("Debugging Optimization Constraints")
print("=" * 50)

# Check if any destinations have positive risk-adjusted profit
profitable_destinations = profit_table[profit_table['risk_adjusted_profit'] > 0]
print(f"Destinations with positive risk-adjusted profit: {len(profitable_destinations)}")
if len(profitable_destinations) > 0:
    print(profitable_destinations[['name', 'risk_adjusted_profit', 'risk_adjusted_capacity']].round(3))
else:
    print("No destinations have positive risk-adjusted profit!")
    print("Risk adjustments may be too high. Consider:")
    print("  • Reducing counterparty risk factors")
    print("  • Lowering volatility impact")
    print("  • Adjusting credit ratings")

# Check capacity constraints
total_capacity = profit_table['risk_adjusted_capacity'].sum()
print(f"\nTotal risk-adjusted capacity: {total_capacity:.2f}")
print(f"Supply to allocate: {supply_units}")
print(f"Capacity sufficient: {'Yes' if total_capacity >= supply_units else 'No'}")

# Check credit limits
print(f"\nCredit limits:")
for code in profit_table['code']:
    limit = risk_manager.exposure_limits.get(code, float('inf'))
    print(f"  {code}: {limit if limit < float('inf') else 'No limit'}")

# Check diversification constraint
max_single_allocation = supply_units * (1 - enhanced_assumptions['diversification_target'])
print(f"\nMax single allocation (diversification): {max_single_allocation:.2f}")

Debugging Optimization Constraints
Destinations with positive risk-adjusted profit: 0
No destinations have positive risk-adjusted profit!
Risk adjustments may be too high. Consider:
  • Reducing counterparty risk factors
  • Lowering volatility impact
  • Adjusting credit ratings

Total risk-adjusted capacity: 31.00
Supply to allocate: 20
Capacity sufficient: Yes

Credit limits:
  SLNG: 50.0
  JP: 100.0
  CN: 30.0

Max single allocation (diversification): 14.00


In [29]:
# Simplified optimization function - replace the complex one if needed

def simple_optimize_with_risks(
    profit_table: pd.DataFrame,
    total_supply_units: float,
    risk_manager: CounterpartyRiskManager
) -> Dict:
    """Simplified optimization with basic risk constraints"""
    
    codes = profit_table["code"].tolist()
    
    # Create optimization problem
    m = pulp.LpProblem("lng_allocation_simple", pulp.LpMaximize)
    x = pulp.LpVariable.dicts("alloc", codes, lowBound=0)
    
    # Objective: maximize risk-adjusted profit
    risk_adjusted_profit = {row["code"]: float(row["risk_adjusted_profit"]) for _, row in profit_table.iterrows()}
    m += pulp.lpSum([risk_adjusted_profit[c] * x[c] for c in codes])
    
    # Supply constraint
    m += pulp.lpSum([x[c] for c in codes]) <= float(total_supply_units), "SupplyLimit"
    
    # Individual capacity constraints (use original capacity, not risk-adjusted)
    for _, row in profit_table.iterrows():
        code = row["code"]
        capacity = float(row["monthly_capacity_cargo"])
        m += x[code] <= capacity, f"Capacity_{code}"
    
    # Credit limit constraints (only if reasonable)
    for code in codes:
        credit_limit = risk_manager.exposure_limits.get(code, float('inf'))
        if credit_limit < float('inf') and credit_limit > supply_units * 0.1:  # Only apply if limit is reasonable
            m += x[code] <= credit_limit, f"CreditLimit_{code}"
    
    # Solve optimization
    m.solve(pulp.PULP_CBC_CMD(msg=False))
    
    # Extract results
    allocation = {c: float(x[c].value() or 0.0) for c in codes}
    objective = pulp.value(m.objective)
    
    # Calculate basic risk metrics
    total_exposure = sum(allocation.values())
    
    return {
        "allocation": allocation,
        "objective": objective,
        "total_exposure": total_exposure,
        "optimization_status": pulp.LpStatus[m.status]
    }

In [30]:
# 8. Scenario Analysis

def run_scenario_analysis(scenario_name: str, price_shocks: Dict[str, float], 
                         capacity_multipliers: Dict[str, float] = None) -> Dict:
    """Run optimization under different scenarios"""
    
    # Apply price shocks
    scenario_prices = enhanced_prices.copy()
    for market, shock in price_shocks.items():
        scenario_prices[market] = scenario_prices.get(market, 0) + shock
    
    # Apply capacity multipliers
    scenario_ports = ports_df.copy()
    if capacity_multipliers:
        scenario_ports['monthly_capacity_cargo'] = scenario_ports.apply(
            lambda r: r['monthly_capacity_cargo'] * capacity_multipliers.get(r['code'], 1.0), 
            axis=1
        )
    
    # Build profit table for scenario
    scenario_profit_table = build_enhanced_profit_table(
        scenario_ports, 
        scenario_prices, 
        enhanced_assumptions,
        risk_manager, 
        demand_model
    )
    
    # Run optimization
    scenario_result = optimize_with_risk_constraints(
        scenario_profit_table,
        supply_units,
        risk_manager,
        max_portfolio_risk=enhanced_assumptions['risk_tolerance'],
        diversification_factor=enhanced_assumptions['diversification_target']
    )
    
    return {
        'scenario_name': scenario_name,
        'prices': scenario_prices,
        'profit_table': scenario_profit_table,
        'optimization_result': scenario_result
    }

# Define scenarios
scenarios = {
    'Base Case': ({}, {}),
    'Cold Snap NE Asia': ({'JP': 3.0, 'CN': 1.5}, {}),
    'SLNG Outage': ({}, {'SLNG': 0.5}),
    'China Demand Surge': ({'CN': 2.0}, {'CN': 1.5}),
    'Global Price Volatility': ({'SLNG': 1.0, 'JP': 2.0, 'CN': 1.5}, {})
}

# Run scenario analysis
scenario_results = {}
for name, (price_shocks, capacity_multipliers) in scenarios.items():
    result = run_scenario_analysis(name, price_shocks, capacity_multipliers)
    scenario_results[name] = result

print("🌡️ Scenario Analysis Results")
print("=" * 50)

# Compare scenarios
scenario_comparison = []
for name, result in scenario_results.items():
    opt_result = result['optimization_result']
    scenario_comparison.append({
        'Scenario': name,
        'Total Profit': opt_result['objective'],
        'Total Allocation': opt_result['total_exposure'],
        'Risk Score': opt_result['risk_metrics']['risk_score'],
        'Diversification': opt_result['risk_metrics']['diversification_score']
    })

comparison_df = pd.DataFrame(scenario_comparison)
print(comparison_df.round(3))

# Visualize scenario comparison
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Profit comparison
axes[0,0].bar(comparison_df['Scenario'], comparison_df['Total Profit'], 
              color='lightblue', alpha=0.7)
axes[0,0].set_title('Total Profit by Scenario')
axes[0,0].set_ylabel('Profit ($)')
axes[0,0].tick_params(axis='x', rotation=45)

# Risk comparison
axes[0,1].bar(comparison_df['Scenario'], comparison_df['Risk Score'], 
              color='red', alpha=0.7)
axes[0,1].set_title('Risk Score by Scenario')
axes[0,1].set_ylabel('Risk Score')
axes[0,1].tick_params(axis='x', rotation=45)

# Diversification comparison
axes[1,0].bar(comparison_df['Scenario'], comparison_df['Diversification'], 
              color='green', alpha=0.7)
axes[1,0].set_title('Diversification Score by Scenario')
axes[1,0].set_ylabel('Diversification Score')
axes[1,0].tick_params(axis='x', rotation=45)

# Allocation comparison
allocation_comparison = []
for name, result in scenario_results.items():
    allocation = result['optimization_result']['allocation']
    for code, amount in allocation.items():
        if amount > 0:
            allocation_comparison.append({
                'Scenario': name,
                'Destination': profit_table[profit_table['code'] == code]['name'].iloc[0],
                'Allocation': amount
            })

if allocation_comparison:
    allocation_df = pd.DataFrame(allocation_comparison)
    pivot_df = allocation_df.pivot(index='Scenario', columns='Destination', values='Allocation').fillna(0)
    pivot_df.plot(kind='bar', ax=axes[1,1], stacked=True)
    axes[1,1].set_title('Allocation by Scenario')
    axes[1,1].set_ylabel('Allocation (units)')
    axes[1,1].tick_params(axis='x', rotation=45)
    axes[1,1].legend(bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

🌡️ Scenario Analysis Results
                  Scenario  Total Profit  Total Allocation  Risk Score  \
0                Base Case           0.0               0.0         0.0   
1        Cold Snap NE Asia           0.0               0.0         0.0   
2              SLNG Outage           0.0               0.0         0.0   
3       China Demand Surge           0.0               0.0         0.0   
4  Global Price Volatility           0.0               0.0         0.0   

   Diversification  
0              1.0  
1              1.0  
2              1.0  
3              1.0  
4              1.0  


In [31]:
# 9. Sensitivity Analysis

def sensitivity_analysis(parameter: str, values: List[float], 
                        base_value: float) -> pd.DataFrame:
    """Run sensitivity analysis on key parameters"""
    
    results = []
    original_value = enhanced_assumptions.get(parameter, base_value)
    
    for value in values:
        # Update assumptions
        test_assumptions = enhanced_assumptions.copy()
        test_assumptions[parameter] = value
        
        # Build profit table
        test_profit_table = build_enhanced_profit_table(
            ports_df, 
            enhanced_prices, 
            test_assumptions,
            risk_manager, 
            demand_model
        )
        
        # Run optimization
        test_result = optimize_with_risk_constraints(
            test_profit_table,
            supply_units,
            risk_manager,
            max_portfolio_risk=enhanced_assumptions['risk_tolerance'],
            diversification_factor=enhanced_assumptions['diversification_target']
        )
        
        results.append({
            'parameter': parameter,
            'value': value,
            'total_profit': test_result['objective'],
            'total_allocation': test_result['total_exposure'],
            'risk_score': test_result['risk_metrics']['risk_score']
        })
    
    return pd.DataFrame(results)

# Run sensitivity analysis on key parameters
sensitivity_results = {}

# Freight cost sensitivity
freight_values = np.linspace(0.01, 0.05, 10)
sensitivity_results['freight_cost'] = sensitivity_analysis(
    'freight_cost_per_nm_per_unit', freight_values, 0.02
)

# Boil-off rate sensitivity
boiloff_values = np.linspace(0.001, 0.003, 10)
sensitivity_results['boiloff_rate'] = sensitivity_analysis(
    'boiloff_rate_per_1000nm', boiloff_values, 0.0015
)

# Risk tolerance sensitivity
risk_values = np.linspace(0.05, 0.25, 10)
sensitivity_results['risk_tolerance'] = sensitivity_analysis(
    'risk_tolerance', risk_values, 0.15
)

print("📊 Sensitivity Analysis Results")
print("=" * 50)

# Visualize sensitivity analysis
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Freight cost sensitivity
axes[0].plot(sensitivity_results['freight_cost']['value'], 
             sensitivity_results['freight_cost']['total_profit'], 'o-')
axes[0].set_title('Freight Cost Sensitivity')
axes[0].set_xlabel('Freight Cost ($/nm/unit)')
axes[0].set_ylabel('Total Profit ($)')
axes[0].grid(True, alpha=0.3)

# Boil-off rate sensitivity
axes[1].plot(sensitivity_results['boiloff_rate']['value'], 
             sensitivity_results['boiloff_rate']['total_profit'], 'o-', color='orange')
axes[1].set_title('Boil-off Rate Sensitivity')
axes[1].set_xlabel('Boil-off Rate (per 1000nm)')
axes[1].set_ylabel('Total Profit ($)')
axes[1].grid(True, alpha=0.3)

# Risk tolerance sensitivity
axes[2].plot(sensitivity_results['risk_tolerance']['value'], 
             sensitivity_results['risk_tolerance']['total_profit'], 'o-', color='green')
axes[2].set_title('Risk Tolerance Sensitivity')
axes[2].set_xlabel('Risk Tolerance')
axes[2].set_ylabel('Total Profit ($)')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Display sensitivity summary
for param, results in sensitivity_results.items():
    print(f"\n{param.replace('_', ' ').title()} Sensitivity:")
    print(f"  Profit Range: ${results['total_profit'].min():.2f} - ${results['total_profit'].max():.2f}")
    print(f"  Profit Volatility: ${results['total_profit'].std():.2f}")

📊 Sensitivity Analysis Results

Freight Cost Sensitivity:
  Profit Range: $0.00 - $9.05
  Profit Volatility: $2.86

Boiloff Rate Sensitivity:
  Profit Range: $0.00 - $0.00
  Profit Volatility: $0.00

Risk Tolerance Sensitivity:
  Profit Range: $0.00 - $0.00
  Profit Volatility: $0.00


In [None]:
# 10. Summary and Recommendations

print("LNG Trade Strategy Optimization Summary")
print("=" * 60)

# Base case results
base_result = scenario_results['Base Case']['optimization_result']
print(f"\nBase Case Performance:")
print(f"  Total Expected Profit: ${base_result['objective']:,.2f}")
print(f"  Total Allocation: {base_result['total_exposure']:.2f} units")
print(f"  Risk Score: {base_result['risk_metrics']['risk_score']:.3f}")
print(f"  Diversification Score: {base_result['risk_metrics']['diversification_score']:.3f}")

print(f"\nKey Insights:")
print(f"  • Risk-adjusted optimization reduces profit by ~{((profit_table['base_profit'].sum() - base_result['objective']) / profit_table['base_profit'].sum() * 100):.1f}% but improves risk profile")
print(f"  • Portfolio is {base_result['risk_metrics']['diversification_score']:.1%} diversified")
print(f"  • Highest risk-adjusted profit destination: {profit_table.loc[profit_table['risk_adjusted_profit'].idxmax(), 'name']}")
print(f"  • Most constrained by credit limits: {min(risk_manager.exposure_limits.items(), key=lambda x: x[1])[0]}")

print(f"\nRecommendations:")
print(f"  1. Monitor counterparty credit quality and adjust limits accordingly")
print(f"  2. Consider hedging strategies for high-volatility markets")
print(f"  3. Implement dynamic pricing based on demand elasticity")
print(f"  4. Regular stress testing under various scenarios")
print(f"  5. Portfolio rebalancing based on risk metrics")

print(f"\nNext Steps:")
print(f"  • Integrate real-time market data feeds")
print(f"  • Add more sophisticated risk models (VaR, CVaR)")
print(f"  • Implement multi-period optimization")
print(f"  • Add storage and inventory management")
print(f"  • Develop automated rebalancing triggers")

print("\nEnhanced LNG Trade Strategy Optimization Complete!")

🎯 LNG Trade Strategy Optimization Summary

📈 Base Case Performance:
  Total Expected Profit: $0.00
  Total Allocation: 0.00 units
  Risk Score: 0.000
  Diversification Score: 1.000

🎯 Key Insights:
  • Risk-adjusted optimization reduces profit by ~100.0% but improves risk profile
  • Portfolio is 100.0% diversified
  • Highest risk-adjusted profit destination: Singapore
  • Most constrained by credit limits: CN

💡 Recommendations:
  1. Monitor counterparty credit quality and adjust limits accordingly
  2. Consider hedging strategies for high-volatility markets
  3. Implement dynamic pricing based on demand elasticity
  4. Regular stress testing under various scenarios
  5. Portfolio rebalancing based on risk metrics

🔧 Next Steps:
  • Integrate real-time market data feeds
  • Add more sophisticated risk models (VaR, CVaR)
  • Implement multi-period optimization
  • Add storage and inventory management
  • Develop automated rebalancing triggers

✅ Enhanced LNG Trade Strategy Optimizatio

# Data Scraping

In [56]:
# lng_loader.py
# Clean LNG Market Data Loader — robust, modular, and JKM-friendly

from __future__ import annotations

import re
import warnings
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Tuple

import numpy as np
import pandas as pd

warnings.filterwarnings("ignore")


# ------------------------------- Config ------------------------------------ #

@dataclass
class LoaderConfig:
    data_folder: Path = Path("data")

    # File names (you can swap these to patterns if you like)
    historical_files: Dict[str, str] = field(default_factory=lambda: {
        "jkm_historical": "JKM Spot LNG Historical (Extracted 23Sep25).xlsx",
        "henry_hub_historical": "Henry Hub Historical (Extracted 23Sep25).xlsx",
        "ttf_historical": "TTF Historical (Extracted 23Sep25).xlsx",
        "brent_historical": "Brent Oil Historical Prices (Extracted 01Oct25).xlsx",
        "wti_historical": "WTI Historical (Extracted 23Sep25).xlsx",
    })
    forward_files: Dict[str, str] = field(default_factory=lambda: {
        "jkm_forward": "JKM Spot LNG Forward (Extracted 23Sep25).xlsx",
        "henry_hub_forward": "Henry Hub Forward (Extracted 23Sep25).xlsx",
        "ttf_forward": "TTF Forward (Extracted 23Sep25).xlsx",
        "wti_forward": "WTI Forward (Extracted 23Sep25).xlsx",
    })

    # Parsing
    excel_engine: Optional[str] = "openpyxl"
    sheet: Optional[str | int] = 0  # autodetect first sheet if None

    # Cleaning
    boiloff_window_days: int = 30  # used for volatility calc (rolling window)
    annualization_trading_days: int = 252


# ----------------------------- Loader Class -------------------------------- #

class LNGDataLoader:
    """
    Clean and organized LNG market data loader with robust Excel parsing,
    especially for JKM "Price History" workbooks that have multi-section sheets.
    """

    def __init__(self, config: LoaderConfig | None = None):
        self.cfg = config or LoaderConfig()
        self.historical_data: Dict[str, pd.DataFrame] = {}
        self.forward_data: Dict[str, pd.DataFrame] = {}
        self.market_metrics: Dict[str, Dict] = {}

    # ----------------------------- Public API ------------------------------ #

    def load_all_data(self) -> Dict[str, pd.DataFrame]:
        print("📊 Loading Real LNG Market Data from:", self.cfg.data_folder)
        print("=" * 70)

        self._load_historical_data()
        self._load_forward_data()
        self.market_metrics = self._calculate_market_metrics()
        return self.historical_data

    def get_current_prices_for_optimization(self) -> Dict[str, float]:
        """
        Map latest prices to the optimiser’s destination codes.
        Adjust mapping logic to your use-case.
        """
        current: Dict[str, float] = {}
        for name, df in self.historical_data.items():
            if df.empty or "price" not in df.columns:
                continue
            latest = float(df["price"].iloc[-1])
            n = name.lower()
            if "jkm" in n:
                current["JP"] = latest
            elif "henry_hub" in n:
                # Example uplift to reflect liquefaction + voyage; calibrate with your basis model.
                current["CN"] = latest * 1.5
            elif "ttf" in n:
                current["SLNG"] = latest * 1.1
        return current

    def export_data(self, filename: str = "lng_market_data.xlsx") -> None:
        if not self.historical_data and not self.forward_data:
            print("❌ Nothing to export.")
            return
        try:
            with pd.ExcelWriter(filename, engine="openpyxl") as writer:
                for name, df in self.historical_data.items():
                    if not df.empty:
                        df.to_excel(writer, sheet_name=f"{name}_historical")
                for name, df in self.forward_data.items():
                    if not df.empty:
                        df.to_excel(writer, sheet_name=f"{name}_forward")
                if self.market_metrics:
                    pd.DataFrame(self.market_metrics).T.to_excel(writer, sheet_name="market_metrics")
            print(f"📁 Exported to {filename}")
        except Exception as e:
            print(f"❌ Export error: {e}")

    # --------------------------- Historical Load --------------------------- #

    def _load_historical_data(self) -> None:
        print("📈 Loading historical price data...")
        for key, fname in self.cfg.historical_files.items():
            fpath = self.cfg.data_folder / fname
            print(f"  • {key}: {fpath.name}")
            if not fpath.exists():
                print("    ↳ ⚠️ File not found, skipping.")
                continue
            try:
                df_raw = pd.read_excel(fpath, sheet_name=self.cfg.sheet, engine=self.cfg.excel_engine, header=None)
                df_clean = (
                    self._clean_jkm(df_raw) if "jkm" in key.lower()
                    else self._clean_generic_price(df_raw, key)
                )
                if df_clean.empty:
                    print("    ↳ ❌ No valid rows after cleaning.")
                else:
                    self.historical_data[key] = df_clean
                    print(f"    ↳ ✅ {len(df_clean)} rows.")
            except Exception as e:
                print(f"    ↳ ❌ Error: {e}")

    # ---------------------------- Forward Load ----------------------------- #

    def _load_forward_data(self) -> None:
        print("📈 Loading forward curve data...")
        for key, fname in self.cfg.forward_files.items():
            fpath = self.cfg.data_folder / fname
            print(f"  • {key}: {fpath.name}")
            if not fpath.exists():
                print("    ↳ ⚠️ File not found, skipping.")
                continue
            try:
                df_raw = pd.read_excel(fpath, sheet_name=self.cfg.sheet, engine=self.cfg.excel_engine)
                df = self._clean_forward(df_raw, key)
                if df.empty:
                    print("    ↳ ❌ No valid rows after cleaning.")
                else:
                    self.forward_data[key] = df
                    print(f"    ↳ ✅ {len(df)} rows.")
            except Exception as e:
                print(f"    ↳ ❌ Error: {e}")

    # ---------------------------- Cleaners -------------------------------- #

    def _clean_generic_price(self, df: pd.DataFrame, name: str) -> pd.DataFrame:
        """
        Generic cleaner: tries to find a 2-column (date, price) table anywhere in the sheet.
        """
        df2 = df.copy()
        df2.replace(r"^\s*$", np.nan, regex=True, inplace=True)
        df2.dropna(how="all", inplace=True)

        # Heuristic: find first row that contains a "date-ish" value and then numeric values after it.
        # We’ll scan nearby rows to infer headers if present.
        # If the sheet already has tidy columns, this will still work.
        candidates: List[Tuple[int, List[str]]] = []
        for i in range(min(len(df2), 200)):  # limit scan
            row = df2.iloc[i].tolist()
            text_row = [self._clean_str(x) for x in row]
            if any(k in " ".join(text_row) for k in ["date", "exchange date", "trade date"]):
                candidates.append((i, text_row))

        # If we saw an explicit header, use it
        header_row = candidates[0][0] if candidates else None
        if header_row is not None:
            data = df2.iloc[header_row + 1:].copy()
            cols = [self._clean_str(x) or f"col{j}" for j, x in enumerate(df2.iloc[header_row].tolist())]
            data.columns = cols
        else:
            # Fallback: assume first two non-empty columns are date/price
            data = df2.copy()
            data.columns = [f"col{j}" for j in range(data.shape[1])]

        # Find likely date / price columns
        dcol, pcol = self._infer_date_price_columns(data.columns)
        if dcol not in data.columns or pcol not in data.columns:
            # try again with simple first two columns
            dcol, pcol = data.columns[:2]

        out = data[[dcol, pcol]].rename(columns={dcol: "date", pcol: "price"}).copy()
        out["date"] = pd.to_datetime(out["date"], errors="coerce")
        out["price"] = pd.to_numeric(out["price"], errors="coerce")
        out.dropna(subset=["date", "price"], inplace=True)
        out.sort_values("date", inplace=True)
        return self._add_returns_and_vol(out, name)

    def _clean_jkm(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        JKM workbook cleaner: reliably finds the 'JKMc1 History' table and
        uses its header row ('Exchange Date', 'Close', 'Net', '%Chg', 'Volume', 'OI').
        """
        # 1) locate the "JKMc1 History" anchor row (any column)
        anchor = self._find_row_containing(df, patterns=[r"jkmc?1\s+history"], anywhere=True)
        if anchor is None:
            # Some files label it simply "History" — fallback to generic
            return self._clean_generic_price(df, "jkm_historical")

        # 2) find the actual header row within the next ~15 rows (search across all columns)
        header = self._find_header_row(
            df,
            start=anchor,
            window=20,
            required_any=["exchange date"],
            required_all=[]
        )
        if header is None:
            return self._clean_generic_price(df, "jkm_historical")

        # 3) carve out the body until a blank row or a new section keyword
        stop_keywords = ["statistics", "vap", "summary"]
        data_start = header + 1
        data_end = self._find_table_end(df, data_start, stop_keywords)

        body = df.iloc[data_start:data_end].copy()
        cols = [self._clean_str(x) or f"col{j}" for j, x in enumerate(df.iloc[header].tolist())]
        body.columns = cols

        # 4) choose columns: date = 'exchange date' (or similar), price = 'close'
        dcol = self._pick_col(body.columns, ["exchange date", "date"])
        pcol = self._pick_col(body.columns, ["close", "settle", "price"])
        if dcol is None or pcol is None:
            return self._clean_generic_price(df, "jkm_historical")

        out = body[[dcol, pcol]].rename(columns={dcol: "date", pcol: "price"}).copy()
        out["date"] = pd.to_datetime(out["date"], errors="coerce")
        out["price"] = pd.to_numeric(out["price"], errors="coerce")
        out.dropna(subset=["date", "price"], inplace=True)
        out.sort_values("date", inplace=True)
        return self._add_returns_and_vol(out, "jkm_historical")

    def _clean_forward(self, df: pd.DataFrame, name: str) -> pd.DataFrame:
        df2 = df.copy()
        df2.replace(r"^\s*$", np.nan, regex=True, inplace=True)
        df2.dropna(how="all", inplace=True)
        # convert numeric columns where possible (leave tenor/date labels as-is)
        for c in df2.columns[1:]:
            df2[c] = pd.to_numeric(df2[c], errors="ignore")
        return df2

    # --------------------------- Metrics ----------------------------------- #

    def _add_returns_and_vol(self, df: pd.DataFrame, source_name: str) -> pd.DataFrame:
        df = df.copy()
        df.set_index("date", inplace=True)
        df["daily_return"] = df["price"].pct_change()
        df["volatility_30d"] = (
            df["daily_return"]
            .rolling(self.cfg.boiloff_window_days)
            .std()
            * np.sqrt(self.cfg.annualization_trading_days)
        )
        df["source"] = source_name
        return df

    def _calculate_market_metrics(self) -> Dict[str, Dict]:
        metrics: Dict[str, Dict] = {}
        for name, data in self.historical_data.items():
            if data.empty or "price" not in data.columns:
                continue
            latest = float(data["price"].iloc[-1])
            base_idx = -self.cfg.boiloff_window_days if len(data) >= self.cfg.boiloff_window_days else 0
            then = float(data["price"].iloc[base_idx])
            metrics[name] = {
                "current_price": latest,
                "price_change_30d": ((latest - then) / then) * 100 if then else np.nan,
                "volatility_30d": float(data["volatility_30d"].iloc[-1]) if "volatility_30d" in data.columns else np.nan,
                "price_52w_high": float(data["price"].max()),
                "price_52w_low": float(data["price"].min()),
                "data_points": int(len(data)),
                "date_range": f"{data.index.min().date()} to {data.index.max().date()}",
            }
        return metrics

    # ------------------------- Helper routines ----------------------------- #

    @staticmethod
    def _clean_str(x) -> str:
        if x is None or (isinstance(x, float) and np.isnan(x)):
            return ""
        return str(x).strip().replace("\xa0", " ").lower()

    def _find_row_containing(
        self, df: pd.DataFrame, patterns: Iterable[str], anywhere: bool = False
    ) -> Optional[int]:
        """Return the first row index where any regex pattern matches (in col0 or anywhere)."""
        pats = [re.compile(p, flags=re.I) for p in patterns]
        for i in range(len(df)):
            row = df.iloc[i]
            cells = row.tolist() if anywhere else [row.iloc[0]]
            text = " ".join(self._clean_str(c) for c in cells)
            if any(p.search(text) for p in pats):
                return i
        return None

    def _find_header_row(
        self,
        df: pd.DataFrame,
        start: int,
        window: int,
        required_any: List[str],
        required_all: List[str],
    ) -> Optional[int]:
        """Search rows [start, start+window) for a header line containing tokens."""
        req_any = [self._clean_str(x) for x in required_any]
        req_all = [self._clean_str(x) for x in required_all]
        for i in range(start, min(start + window, len(df))):
            row = df.iloc[i].tolist()
            row_text = " ".join(self._clean_str(x) for x in row)
            if any(tok in row_text for tok in req_any) and all(tok in row_text for tok in req_all):
                return i
        return None

    def _find_table_end(self, df: pd.DataFrame, start: int, stop_keywords: List[str]) -> int:
        stops = [self._clean_str(k) for k in stop_keywords]
        for i in range(start, len(df)):
            row = df.iloc[i]
            first = self._clean_str(row.iloc[0])
            # stop on blank row or a keyword signaling a new section
            if first == "" or any(k in first for k in stops):
                return i
        return len(df)

    @staticmethod
    def _infer_date_price_columns(columns: Iterable[str]) -> Tuple[str, str]:
        cols = [str(c).lower() for c in columns]
        # best guesses
        d = next((c for c in columns if re.search(r"date|exchange", str(c), flags=re.I)), None)
        p = next((c for c in columns if re.search(r"close|price|settle|usd", str(c), flags=re.I)), None)
        # fallback
        if d is None: d = list(columns)[0]
        if p is None: p = list(columns)[1 if len(list(columns)) > 1 else 0]
        return d, p

    @staticmethod
    def _pick_col(columns: Iterable[str], candidates: List[str]) -> Optional[str]:
        low = {str(c).lower(): c for c in columns}
        for want in candidates:
            for k, original in low.items():
                if want in k:
                    return original
        return None

    # ----------------------------- Debugging ------------------------------- #

    def debug_data_structure(self, filename: str) -> pd.DataFrame:
        """Print a quick structure summary for a raw Excel file (first sheet)."""
        path = self.cfg.data_folder / filename
        print(f"🔍 Debug: {path}")
        try:
            df = pd.read_excel(path, sheet_name=self.cfg.sheet, engine=self.cfg.excel_engine, header=None)
        except Exception as e:
            print("❌ Read error:", e)
            return pd.DataFrame()

        print("shape:", df.shape)
        print("top 15 rows:")
        with pd.option_context("display.max_columns", None, "display.width", 200):
            print(df.head(15))
        anchor = self._find_row_containing(df, [r"jkmc?1\s+history"], anywhere=True)
        print("JKMc1 History anchor:", anchor)
        if anchor is not None:
            header = self._find_header_row(df, anchor, window=20, required_any=["exchange date"], required_all=[])
            print("Header row guess:", header)
            if header is not None:
                print("Header cells:", df.iloc[header].tolist())
        return df


# --------------------------- Example usage --------------------------------- #
if __name__ == "__main__":
    loader = LNGDataLoader()
    hist = loader.load_all_data()

    if loader.market_metrics:
        print("\n📊 Market Data Summary")
        print("-" * 70)
        for name, m in loader.market_metrics.items():
            print(f"\n{name}:")
            for k, v in m.items():
                if isinstance(v, float):
                    print(f"  {k}: {v:.4f}")
                else:
                    print(f"  {k}: {v}")

    current = loader.get_current_prices_for_optimization()
    print("\n💰 Current Prices for Optimisation mapping")
    for code, val in current.items():
        print(f"  {code}: {val:.2f}")

    # Uncomment to export
    # loader.export_data("lng_market_data.xlsx")


📊 Loading Real LNG Market Data from: data
📈 Loading historical price data...
  • jkm_historical: JKM Spot LNG Historical (Extracted 23Sep25).xlsx
    ↳ ✅ 753 rows.
  • henry_hub_historical: Henry Hub Historical (Extracted 23Sep25).xlsx
    ↳ ✅ 753 rows.
  • ttf_historical: TTF Historical (Extracted 23Sep25).xlsx
    ↳ ✅ 760 rows.
  • brent_historical: Brent Oil Historical Prices (Extracted 01Oct25).xlsx
    ↳ ✅ 461 rows.
  • wti_historical: WTI Historical (Extracted 23Sep25).xlsx
    ↳ ✅ 503 rows.
📈 Loading forward curve data...
  • jkm_forward: JKM Spot LNG Forward (Extracted 23Sep25).xlsx
    ↳ ✅ 62 rows.
  • henry_hub_forward: Henry Hub Forward (Extracted 23Sep25).xlsx
    ↳ ✅ 15 rows.
  • ttf_forward: TTF Forward (Extracted 23Sep25).xlsx
    ↳ ✅ 23 rows.
  • wti_forward: WTI Forward (Extracted 23Sep25).xlsx
    ↳ ✅ 124 rows.

📊 Market Data Summary
----------------------------------------------------------------------

jkm_historical:
  current_price: 11.2700
  price_change_30d: -5.

In [36]:
pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [openpyxl]
[1A[2KSuccessfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.
