<a href="https://colab.research.google.com/github/farielshafee2018/JPMorgan-finance-data/blob/main/contract_price_edited.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

To find price of contract:
1. Take in injection and withdrawal date date1, date2
2. Find price on date1 and date2 using previously fitted predictive curve
Find rate to get maximum possible injection/withdrawal (for simplicity start by assuming both are same)
3. find total days in storage by subtracting end day from start day
4. find storage volume by multiplying days with rate
5. limit by max volume
6. find total cost of storage by multiplying storage price by volume
7. substract initial price from final price, then multiply by volume and substract total storage cost to find contract price

The function used to predict prices is used within a larger function that also takes in the dates and then calculates the price

The functions used here are
1. setup_sarima
2. get interpolated_price, and
3. analyze_gas_storage_contract

within the final function:
1.get_valid_date and
2. get_valid_float
were used to define an acceptable date within the range and then to convert input strings into valid floats

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX

# --- GLOBAL VARIABLES FOR FITTED MODEL DATA ---
# These will be populated by the setup function
gas_price_series_eom = None  # Historical EOM data
forecast_mean_eom = None     # Predicted EOM means
forecast_ci_eom = None       # Predicted EOM confidence intervals
MIN_DATE = None
MAX_DATE = None

def setup_sarima_model():
    """

    """
    global gas_price_series_eom, forecast_mean_eom, forecast_ci_eom, MIN_DATE, MAX_DATE
    df = pd.read_csv("/content/Nat_Gas.csv")
    df["Dates"]=pd.to_datetime(df["Dates"])
    df_indexed = df.set_index("Dates")
    gas_price_series_eom = df_indexed['Prices'] # Fixed: Assigning the 'Prices' Series, not the whole DataFra


    # --- 2. Fit SARIMA Model (2,1,1)(1,1,0)12 ---
    print("--- 2. Fitting SARIMA Model... ---")
    order = (2, 1, 1)
    seasonal_order = (1, 1, 0, 12)

    sarima_model_eom = SARIMAX(
        gas_price_series_eom,
        order=order,
        seasonal_order=seasonal_order,
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    # The 'disp=False' argument suppresses excessive output during fitting
    sarima_model_fit_eom = sarima_model_eom.fit(disp=False)

    # --- 3. Forecast Future 1 Year ---
    forecast_steps = 12
    forecast_results_eom = sarima_model_fit_eom.get_forecast(steps=forecast_steps)

    forecast_mean_eom = forecast_results_eom.predicted_mean
    forecast_ci_eom = forecast_results_eom.conf_int()

    MIN_DATE = gas_price_series_eom.index.min().date()
    MAX_DATE = forecast_mean_eom.index.max().date()
    print(f"Forecast Data Range: {forecast_mean_eom.index[0].date()} to {MAX_DATE}")
    print("Setup complete. Model ready for contract analysis.")

def get_interpolated_price(date_to_find, historical_series, forecast_series, ci_series):
    """
    Estimates the price for any day using linear interpolation between
    surrounding END-OF-MONTH prices.
    Returns a dictionary of results.
    """

    # 1. Combine historical and forecast means for seamless lookup
    combined_series = pd.concat([historical_series, forecast_series])

    # 2. Determine interpolation bounds (EOM dates)
    # date_floor: The EOM date *on or before* the date_to_find
    date_floor = date_to_find + pd.offsets.MonthEnd(0)
    if date_to_find.day != date_floor.day:
        date_floor = date_to_find + pd.offsets.MonthEnd(-1)

    # The 'Ceil' is the next EOM date
    date_ceil = date_floor + pd.offsets.MonthEnd(1)

    # --- Edge Case: If the date is the last EOM point ---
    if date_floor == combined_series.index[-1]:
        price = combined_series.loc[date_floor]
        return {
            'status': 'Exact',
            'type': 'HISTORICAL' if date_to_find <= historical_series.index.max() else 'PREDICTED',
            'price': price,
            'date': date_to_find.date()
        }

    # --- Interpolation ---
    try:
        P_floor = combined_series.loc[date_floor]
        P_ceil = combined_series.loc[date_ceil]
    except KeyError:
        # Handle cases where EOM dates fall outside the defined range
        return {'status': 'Error', 'message': f"Internal error: EOM bounds not found for {date_to_find.date()}"}

    total_days = (date_ceil - date_floor).days
    days_elapsed = (date_to_find - date_floor).days
    price_diff = P_ceil - P_floor

    estimated_price = P_floor + (price_diff / total_days * days_elapsed)

    # Determine data type
    data_type = 'HISTORICAL' if date_to_find <= historical_series.index.max() else 'PREDICTED'

    result = {
        'status': 'Interpolated',
        'type': data_type,
        'price': estimated_price,
        'date': date_to_find.date()
    }

    return result


def analyze_gas_storage_contract():
    """
    Prompts the user for contract parameters (injection/withdrawal dates,
    rate, max storage, and cost) and calculates the contract's implied value.
    """
    global gas_price_series_eom, forecast_mean_eom, forecast_ci_eom, MIN_DATE, MAX_DATE

    # 1. Check if model setup is complete
    if gas_price_series_eom is None:
        print("Model data is not loaded. Running setup first...")
        setup_sarima_model()
        print("-" * 50)

    # --- Input Collection and Validation ---

    # Helper to get and validate date input
    def get_valid_date(prompt):
        while True:
            try:
                date_str = input(f"{prompt} (YYYY-MM-DD, Range: {MIN_DATE} to {MAX_DATE}): ")
                date_obj = pd.to_datetime(date_str)

                if date_obj.date() < MIN_DATE or date_obj.date() > MAX_DATE:
                    print(f"Error: Date is outside the modeled range. Must be between {MIN_DATE} and {MAX_DATE}.")
                    continue
                return date_obj
            except ValueError:
                print("Invalid date format. Please use YYYY-MM-DD.")

    # Helper to get and validate numeric input
    def get_valid_float(prompt):
        while True:
            try:
                value = float(input(prompt))
                if value < 0:
                    print("Value cannot be negative.")
                    continue
                return value
            except ValueError:
                print("Invalid input. Please enter a number.")

    print("\n--- Input Contract Details (All Volumes in MMBtu, Prices in $) ---")
    start_date=[]
    end_date=[]
    # ask how many date pairs
    num=input("how many day pairs?")
    i=0
    # input start and end day pairs and put them in array
    for i<num:
        inj_date = get_valid_date("Enter Injection (Buy) Date: ")
        wdr_date = get_valid_date("Enter Withdrawal (Sell) Date: ")
        if wdr_date <= inj_date:
            print("Error: Withdrawal date must be AFTER the injection date.")
            return
        else:
            start_date.append(inj_date)
            end_date.append(wdr_date)
            i+=1



    rate = get_valid_float("Enter Injection/Withdrawal Daily Rate (MMBtu/day, e.g., 5000): ")
    max_storage = get_valid_float("Enter Maximum Storage Capacity (MMBtu, e.g., 1000000): ")
    storage_cost = get_valid_float("Enter Storage Cost per MMBtu: ")

    # --- Calculation ---



    # 1. keep assigning to days in first_days last_days pairs until
    # maximum volume is reached
    volume = 0
    cost = 0
    for i in range(len(start_date)):
      if volume < max_storage:
        volume += rate
        # i) Estimate Spot Prices using the SARIMA model
        inj_result = get_interpolated_price(start_date[i], gas_price_series_eom, forecast_mean_eom, forecast_ci_eom)
        wdr_result = get_interpolated_price(end_date[i], gas_price_series_eom, forecast_mean_eom, forecast_ci_eom)

        if inj_result['status'] == 'Error' or wdr_result['status'] == 'Error':
           print("\nCalculation Error: Could not retrieve necessary price data.")
           return

        inj_price = inj_result['price']
        wdr_price = wdr_result['price']
        # ii) find cost
        cost+=rate*(wdr_price-inj_price - storage_cost)





    # Gross Profit from Price Difference

    contract_value = cost







    # --- Display Result ---
    print("\n" + "=" * 50)
    print("NATURAL GAS STORAGE CONTRACT ANALYSIS")
    print("=" * 50)
    print(f"Injection (Buy) Date: {inj_date.date()} -> Price Est: **${inj_price:.4f}/MMBtu**")
    print(f"Withdrawal (Sell) Date: {wdr_date.date()} -> Price Est: **${wdr_price:.4f}/MMBtu**")
    print("-" * 50)

    print(f"Gross Price Difference: ${gross_profit_per_mmbtu:.4f}/MMBtu")
    print(f"Storage Cost:           ${storage_cost:.4f}/MMBtu")
    print("-" * 50)

    print(f"Total Storage Volume: {final_volume:,.0f} MMBtu (Max Cap: {max_storage:,.0f} MMBtu)")
    print(f"Total Gross Profit:   ${gross_profit_total:,.2f}")
    print(f"Total Storage Cost:   ${total_storage_cost:,.2f}")
    print("-" * 50)

    # Output Conclusion
    if contract_value > 0:
        print(f"Implied Contract Value (Profit): **${contract_value:,.2f}**")
        print("\nCONCLUSION: This contract looks **POTENTIALLY PROFITABLE** based on the SARIMA forecast.")
    else:
        print(f"Implied Contract Value (Loss): **${contract_value:,.2f}**")
        print("\nCONCLUSION: This contract is **LIKELY TO RESULT IN A LOSS** based on the SARIMA forecast.")

    print("=" * 50)


# --- EXECUTION START ---
# Running setup here so the global variables are initialized when the user calls
# the function for the first time in an interactive environment.
setup_sarima_model()

# Final output: Call the new analysis function to prompt for input
analyze_gas_storage_contract()


  df["Dates"]=pd.to_datetime(df["Dates"])
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


--- 2. Fitting SARIMA Model... ---
Forecast Data Range: 2024-10-31 to 2025-09-30
Setup complete. Model ready for contract analysis.

--- Input Contract Details (All Volumes in MMBtu, Prices in $) ---
Enter Injection (Buy) Date:  (YYYY-MM-DD, Range: 2020-10-31 to 2025-09-30): 2025-11-11
Error: Date is outside the modeled range. Must be between 2020-10-31 and 2025-09-30.
Enter Injection (Buy) Date:  (YYYY-MM-DD, Range: 2020-10-31 to 2025-09-30): 2025-12-12
Error: Date is outside the modeled range. Must be between 2020-10-31 and 2025-09-30.
Enter Injection (Buy) Date:  (YYYY-MM-DD, Range: 2020-10-31 to 2025-09-30): 2025-01-01
Enter Withdrawal (Sell) Date:  (YYYY-MM-DD, Range: 2020-10-31 to 2025-09-30): 2025-02-02
Enter Injection/Withdrawal Daily Rate (MMBtu/day, e.g., 5000): 4000
Enter Maximum Storage Capacity (MMBtu, e.g., 1000000): 10000000
Enter Storage Cost per MMBtu: 20

NATURAL GAS STORAGE CONTRACT ANALYSIS
Injection (Buy) Date: 2025-01-01 -> Price Est: **$12.9285/MMBtu**
Withdrawal