In [1]:
# Trade data summary for ASX electricity contract

In [2]:
# Import libraries
import pandas as pd
import re

In [3]:
file_path = 'trades.csv'

In [4]:
contracts = ['Base', 'Peak', 'Cap', 'Base_strip', 'Peak_strip', 'Cap_strip']
states = ['NSW', 'QLD', 'VIC', 'SA']
expiries = ['Q1', 'Q2', 'Q3', 'Q4']
years = [2022, 2023, 2024, 2025, 2026]

In [5]:
# Define the mappings from the ASX electricity contract cheat sheet
contract_codes = {
    'B': 'Base', 'P': 'Peak', 'G': 'Cap', # $300 cap
    'H': 'Base_strip', 'D': 'Peak_strip', 'R': 'Cap_strip'
}

region_codes = {
    'N': 'NSW', 'Q': 'QLD', 'V': 'VIC', 'S': 'SA'
}

expiry_month_codes = {
    'F': 'JAN', 'G': 'FEB', 'H': 'MAR', 'J': 'APR', 
    'K': 'MAY', 'M': 'JUN', 'N': 'JUL', 'Q': 'AUG', 
    'U': 'SEP', 'V': 'OCT', 'X': 'NOV', 'Z': 'DEC'
}

quarter_codes = {
    'H': 'Q1', 'M': 'Q2', 'U': 'Q3', 'Z': 'Q4'
}

In [6]:
# Function to decode the product code to plain English
def decode(code):
    match = re.match(r'([A-Z])([A-Z])([A-Z])(\d{4})([A-Z]?)(\d{7})?', code)
    
    if match:
        contract = contract_codes.get(match.group(1), '')
        region = region_codes.get(match.group(2), '')
        expiry = quarter_codes.get(match.group(3), '')
        year = match.group(4)
        option_type = match.group(5)
        strike_price = match.group(6)
        
        if not contract:
            raise ValueError(f"Invalid contract code: {match.group(1)}")
        if not region:
            raise ValueError(f"Invalid region code: {match.group(2)}")
        if not expiry:
            raise ValueError(f"Invalid expiry code: {match.group(3)}")
        
        if option_type == "F":
            return f"{contract} {region} {expiry} {year}"
        elif option_type in ["C", "P"]:
            option = "Call option" if option_type == "C" else "Put option"
            strike_price_dollars = f"${int(strike_price) / 100:.2f}"
            return f"{contract} {region} {expiry} {year} {option} with a strike price of {strike_price_dollars}"
    
    return f"{code} - Invalid code"

In [7]:
# Function to encode plain English to product code
def encode(plain_english):
    try:
        parts = plain_english.split()
        
        if len(parts) == 4 or len(parts) == 8:
            contract = parts[0]
            region = parts[1]
            expiry = parts[2]
            year_code = parts[3]
            
            contract_code = next((k for k, v in contract_codes.items() if v == contract), None)
            region_code = next((k for k, v in region_codes.items() if v == region), None)
            expiry_code = next((k for k, v in quarter_codes.items() if v == expiry), None)
            
            if not contract_code:
                raise ValueError(f"Invalid contract code: {contract}")
            if not region_code:
                raise ValueError(f"Invalid region code: {region}")
            if not expiry_code:
                raise ValueError(f"Invalid expiry code: {expiry}")
            
            if len(parts) == 4:
                return f"{contract_code}{region_code}{expiry_code}{year_code}F"
            
            option_type = "C" if parts[4] == "Call" else "P"
            strike_price_cents = f"{int(float(parts[-1][1:]) * 100):07d}"
            return f"{contract_code}{region_code}{expiry_code}{year_code}{option_type}{strike_price_cents}"
        
        raise ValueError("Invalid input format.")
    
    except Exception as e:
        return f"Error: {str(e)}"

In [8]:
# Example usage of the above encode and decode functions
code = "BNH2022C0006000"
plain_english = "Cap QLD Q4 2025"

decoded = decode(code)
encoded = encode(plain_english)

print(f"Decoded: {decoded}")
print(f"Encoded: {encoded}")

Decoded: Base NSW Q1 2022 Call option with a strike price of $60.00
Encoded: GQZ2025F


In [9]:
def generate_trade_codes(contracts, states, expiries, years):
    trade_codes = []
    for contract in contracts:
        for state in states:
            for expiry in expiries:
                for year in years:
                    trade_code = f"{contract} {state} {expiry} {year}"
                    trade_codes.append(encode(trade_code))
    return trade_codes

trade_codes_of_interest = generate_trade_codes(contracts, states, expiries, years)
# print(trade_codes_of_interest)

In [10]:
def read_trade_data(file_path):
    # Skip the first line with the separator information
    df = pd.read_csv(file_path, skiprows=1)
    df['Date'] = pd.to_datetime(df['Date'])  # Convert Date column to datetime format
    return df

In [11]:
# Calculate the number of days a trade is away from the contract expiry
def days_from_expiry(contract_code, trade_date):
    # Decode the contract code
    decoded_info = decode(contract_code)
    if "Invalid code" in decoded_info:
        return decoded_info
    
    # Extract expiry quarter and year
    parts = decoded_info.split()
    expiry_quarter = parts[2]
    expiry_year = parts[3]
    
    # Define the last day of each quarter
    last_days = {
        'Q1': '31/03',
        'Q2': '30/06',
        'Q3': '30/09',
        'Q4': '31/12'
    }
    
    # Get the last day for the given quarter
    last_day = last_days.get(expiry_quarter)
    
    if last_day:
        # Combine with the given year and convert to datetime object
        expiry_date = pd.to_datetime(f"{last_day}/{expiry_year}", format="%d/%m/%Y")
    else:
        return "Invalid quarter"
    
    # Convert trade date to datetime object
    trade_date_dt = pd.to_datetime(trade_date, format="%d/%m/%Y")
    
    # Calculate the number of days difference
    days_diff = (expiry_date - trade_date_dt).days
    
    return days_diff

# Example usage
contract_code = "BNM2024F" # Base NSW Q2 2024
trade_date = "15/03/2024"
print(days_from_expiry(contract_code, trade_date))  # Output: 107

107


In [12]:
def process_trade_data(df, trade_code):
    # Filter the dataframe by the specified trade code
    filtered_df = df[df['Code'] == trade_code]
    
    # Group by date and calculate the required sums
    grouped_df = filtered_df.groupby('Date').agg({
        'Cleared Volume': 'sum',
        'Face Value': 'sum',
        'Volume x MWh': 'sum'
    }).reset_index()
    
    # Rename the columns
    grouped_df.rename(columns={
        'Cleared Volume': 'Daily contracts traded',
        'Face Value': 'Daily dollars traded',
        'Volume x MWh': 'Daily MWh traded'
    }, inplace=True)
    
    # Calculate the daily volume weighted average price in $/MWh
    grouped_df['Daily average price'] = grouped_df['Daily dollars traded'] / grouped_df['Daily MWh traded']
    
    # Calculate the cumulative sums
    grouped_df['Cumulative MWh traded'] = grouped_df['Daily MWh traded'].cumsum()
    grouped_df['Cumulative dollars traded'] = grouped_df['Daily dollars traded'].cumsum()
    
    # Calculate the cumulative trade volume-weighted average price in $/MWh
    grouped_df['Cumulative average price'] = grouped_df['Cumulative dollars traded'] / grouped_df['Cumulative MWh traded']
    
    # Calculate the number of days of the trade from contract expiry
    grouped_df['Days from expiry'] = grouped_df['Date'].apply(lambda x: days_from_expiry(trade_code, x.strftime("%d/%m/%Y")))
    
    return grouped_df

In [13]:
df = read_trade_data(file_path)

In [14]:
# Create an empty dataframe which covers all days with trades
start = pd.Timestamp.max
end = pd.Timestamp.min

for trade_code in trade_codes_of_interest:
    filtered_df = df[df['Code'] == trade_code]
    start_temp = filtered_df['Date'].min()
    end_temp = filtered_df['Date'].max()
    
    if start > start_temp:
        start = start_temp
    if end < end_temp:
        end = end_temp

# Initialize an empty summary dataframe with all possible dates
all_dates = pd.date_range(start=start, end=end)
summary_df = pd.DataFrame({'Date': all_dates})

In [15]:
collated_df_list = []

for trade_code in trade_codes_of_interest:
    # Step 1: Process trade data
    result_df = process_trade_data(df, trade_code)
    
    # Step 2: Decode the trade code to plain English
    plain_english_name = decode(trade_code)
    
    # Step 3: Find the last day of all trades for that trade code
    last_trade_date = result_df['Date'].max()
    
    # Step 4: Forward fill missing values until the last day of trade
    result_df = result_df.set_index('Date').reindex(all_dates).fillna(method='ffill')
    result_df = result_df[result_df.index <= last_trade_date].reset_index()
    
    # Step 5: Create a temporary dataframe with the selected columns
    temp_df = pd.DataFrame({
        f'Days from expiry for {plain_english_name}': result_df['Days from expiry'],
        f'Daily average price for {plain_english_name}': result_df['Daily average price'],
        f'Cumulative average price for {plain_english_name}': result_df['Cumulative average price'],
        f'Daily MWh traded for {plain_english_name}': result_df['Daily MWh traded'],
        f'Cumulative MWh traded for {plain_english_name}': result_df['Cumulative MWh traded'],
        f'Daily dollars traded for {plain_english_name}': result_df['Daily dollars traded'],
        f'Cumulative dollars traded for {plain_english_name}': result_df['Cumulative dollars traded']
    })
    
    # Append the temporary dataframe to the list
    collated_df_list.append(temp_df)

# Concatenate all dataframes in the list along the columns
collated_df = pd.concat(collated_df_list, axis=1)

# Concatenate the collated dataframe with the summary dataframe
summary_df = pd.concat([summary_df, collated_df], axis=1)

In [16]:
summary_df.to_csv('summary.csv')