# Transform Stage: Account Endpoints

In [None]:
import json
import re
from datetime import datetime
from pathlib import Path
from typing import Any, Self, TypeAlias

import pandas as pd
from pandas import DataFrame
from pydantic import BaseModel

# from rich.pretty import pprint

## Pydantic Model
### Transactions: Trades and Overrides
#### Trades: Equities and Options
- Raw trade structure:
    - `amount` - float
    - `date` - datetime
    - `type` - str
    - `trade` - dict
        - `commission` - float
        - `description` - str
        - `price` - float
        - `quantity` - float
        - `symbol` - str
        - `trade_type` - str

- `RawTradeKey` model: the `trade` key from above and its associated fields:

| RawTradeKey   | type  |
| ------------- | ----- |
| `commission`  | float |
| `description` | str   |
| `price`       | float |
| `quantity`    | float |
| `symbol`      | str   |
| `trade_type`  | str   |

- `RawTrade`: the full model of raw transactions of type 'trade'. Note, in the implementation there is an additional `BaseTransaction` model that `RawTrade` inherits from. That model is left out for brevity.
- `FlatTrade` is the flattened version of `RawTrade`. It is not instantiated via the `BaseModel` method `model_validate()` directly, instead, it is instantiated via its class method `flatten_raw_and_validate()` which first flattens the object and then makes a call to `model_validate()`.

| RawTrade | type        | FlatTrade     | type     |
| -------- | ----------- | ------------- | -------- |
| `amount` | str         | `date`        | datetime |
| `date`   | datetime    | `trade_type`  | str      |
| `type`   | str         | `description` | str      |
| `trade`  | RawTradeKey | `symbol`      | str      |
|          |             | `price`       | float    |
|          |             | `quantity`    | float    |
|          |             | `amount`      | float    |
|          |             | `commission`  | float    |

#### Overrides
- Raw override structure:
    - `amount` - float
    - `date` - datetime
    - `type` - str
    - `option` - dict
        - `option_type` - str
        - `description` - str
        - `quantity` - float

- `RawOptionKey` model: the `option` key from above and its associated fields:

| RawOptionKey  | type  |
| ------------- | ----- |
| `option_type` | str   |
| `description` | str   |
| `quantity`    | float |

- `RawOverride`: the full model of raw transactions of type 'option'. Note, in the implementation there is an additional `BaseTransaction` model that `RawOption` inherits from. That model is left out for brevity.
- `FlatOverride` is the flattened version of `RawOption`. It is not instantiated via the `BaseModel` method `model_validate()` directly, instead, it is instantiated via its class method `flatten_raw_and_validate()` which first flattens the object and then makes a call to `model_validate()`.

| RawOverride | type         | FlatOverride  | type     |
| ----------- | ------------ | ------------- | -------- |
| `amount`    | str          | `date`        | datetime |
| `date`      | datetime     | `option_type` | str      |
| `type`      | str          | `description` | str      |
| `option`    | RawOptionKey | `quantity`    | float    |
|             |              | `amount`      | float    |

In [None]:
# Model dictionaries that occur in both the trade and override type transactions.


class RawTradeKey(BaseModel):
    """Raw trade key's values."""

    commission: float
    description: str
    price: float
    quantity: float
    symbol: str
    trade_type: str


class RawOptionKey(BaseModel):
    """Raw option key's values."""

    option_type: str
    description: str
    quantity: float


class BaseTransaction(BaseModel):
    """Model the base form common to all transactions."""

    amount: float
    date: datetime
    type: str


class RawTrade(BaseTransaction):
    """Model the raw transaction of type 'trade' which includes equities and options."""

    trade: RawTradeKey


class RawOverride(BaseTransaction):
    """Model the raw transaction of type 'option' labeled as an override."""

    option: RawOptionKey


class FlatTrade(BaseModel):
    """Model the flattened trade object constructed from the raw form."""

    date: datetime
    trade_type: str
    description: str
    symbol: str
    price: float
    quantity: float
    amount: float
    commission: float

    @classmethod
    def flatten_raw_and_validate(cls, transaction: RawTrade) -> Self:
        """Flatten the raw form of a transaction and validate with pydantic model.

        Parameters
        ----------
        transaction : RawTrade
            Raw transaction object of type 'trade'.

        Returns
        -------
        Self
            An instance of the class with validated data.
        """
        # Exclude the `trade` key, this is handled in the following `data.update()` call.
        data = transaction.model_dump(exclude={"trade"})
        data.update(transaction.trade.model_dump())
        return cls.model_validate(data)


class FlatOverride(BaseModel):
    """Model the flattened override object constructed from the raw form."""

    date: datetime
    option_type: str
    description: str
    quantity: float
    amount: float

    @classmethod
    def flatten_raw_and_validate(cls, transaction: RawOverride) -> Self:
        """Flatten the raw form of a transaction and validate with pydantic model.

        Parameters
        ----------
        transaction : RawOverride
            Raw transaction object of type 'option' labeled as an ovrride.

        Returns
        -------
        Self
            An instance of the class with validated data.
        """
        # Exclude the `option` key, this is handled in the following `data.update()` call.
        data = transaction.model_dump(exclude={"option"})
        data.update(transaction.option.model_dump())
        return cls.model_validate(data)

## Helper Functions
### Read API Data

In [None]:
def read_api_data(endpoint_name: str, data_dir: Path) -> dict[str, Any]:
    """Read an API response's JSON from file.

    Parameters
    ----------
    endpoint_name : str
        API endpoint name.
    data_dir : Path
        Path to the data directory for storing the output JSON file.

    Returns
    -------
    dict[Any, Any]
        JSON returned as a dict.
    """
    # location of the input JSON file.
    file_path = data_dir.joinpath(f"{endpoint_name}.json")
    with open(file_path, "r") as f:
        data = json.load(f)
    return data

### Split History of Transactions

In [None]:
# Transaction container.
TC: TypeAlias = list[dict[str, Any]]
# Validated transaction container.
VTC: TypeAlias = dict[str, TC]


def split_history_and_validate(history_raw: dict[str, Any]) -> tuple[VTC, TC]:
    """Split transactions based on transaction type and validate using the data models.

    Transaction types are as follows:
    - trades: equities and options and overrides (transactions external to tradier.com)
    - other (remaining transactions that do not fit the category above)

    Parameters
    ----------
    history_raw : dict[str, Any]
        Raw data from history endpoint.

    Returns
    -------
    tuple[VTC, TC]
        Either a validated (VTC) or non-validated (TC) transaction container for each transaction type.
    """
    trades: VTC = {
        "equity_raw": [],
        "equity": [],
        "options_raw": [],
        "options": [],
        "overrides_raw": [],
        "overrides": [],
    }
    other_raw: TC = []

    for transaction in history_raw:
        if transaction["type"] == "trade":
            raw_transaction = RawTrade.model_validate(transaction)
            flat_transaction = FlatTrade.flatten_raw_and_validate(raw_transaction)
            if transaction["trade"]["trade_type"] == "equity":
                trades["equity_raw"].append(raw_transaction)
                trades["equity"].append(flat_transaction)
            elif transaction["trade"]["trade_type"] == "option":
                trades["options_raw"].append(raw_transaction)
                trades["options"].append(flat_transaction)
        elif transaction["type"] == "option":
            raw_transaction = RawOverride.model_validate(transaction)
            flat_transaction = FlatOverride.flatten_raw_and_validate(raw_transaction)
            trades["overrides_raw"].append(raw_transaction)
            trades["overrides"].append(flat_transaction)
        else:
            other_raw.append(transaction)
    return trades, other_raw

### Create Equity DataFrame

In [None]:
def create_equity_dataframe(transactions: list[FlatTrade]) -> DataFrame:
    """Create and clean a DataFrame containing equity trades.

    Parameters
    ----------
    transactions : list[FlatTransaction]
        Equity trade transactions.

    Returns
    -------
    DataFrame
        Resulting equity DataFrame.
    """
    equity_dump = [transaction.model_dump() for transaction in transactions]
    equity = pd.DataFrame(equity_dump)

    # Convert to more optimal data types.
    equity["trade_type"] = equity["trade_type"].astype(pd.StringDtype())
    equity["description"] = equity["description"].astype(pd.StringDtype())
    equity["symbol"] = equity["symbol"].astype(pd.StringDtype())

    # Add a calculated amount column to compare to the raw `amount` column.
    equity["calc_amount"] = equity["price"] * equity["quantity"] + equity["commission"]
    return equity

### Create Options DataFrame

In [None]:
def create_options_dataframe(transactions: list[FlatTrade]) -> DataFrame:
    """Create and clean a DataFrame containing options trades.

    Parameters
    ----------
    transactions : list[FlatTransaction]
        Options trade transactions.

    Returns
    -------
    DataFrame
        Resulting options DataFrame.
    """
    options_dump = [transaction.model_dump() for transaction in transactions]
    options = pd.DataFrame(options_dump)

    # Convert to more optimal data types.
    options["trade_type"] = options["trade_type"].astype(pd.StringDtype())
    options["description"] = options["description"].astype(pd.StringDtype())
    options["symbol"] = options["symbol"].astype(pd.StringDtype())

    # Rename the `symbol` column. A symbol column will be derived from the `description` column.
    options = options.rename(columns={"symbol": "transaction_symbol"})

    # Process the `description` column by splitting the string and creating new columns.
    description_split = options["description"].str.split(expand=True)
    description_split = description_split.drop(2, axis=1)
    description_split = description_split.rename(
        columns={0: "multi_leg_type", 1: "symbol", 3: "multi_leg_amount"}
    )
    description_split["multi_leg_type"] = description_split[
        "multi_leg_type"
    ].str.lower()
    description_split["multi_leg_amount"] = description_split[
        "multi_leg_amount"
    ].astype("float64")

    options = pd.concat([options, description_split], axis=1)

    options["multi_leg_position"] = "long"
    options.loc[options["quantity"] < 0, "multi_leg_position"] = "short"
    options["multi_leg_position"] = options["multi_leg_position"].astype(
        pd.StringDtype()
    )

    # Reorder the columns with numeric columns at the end.
    options_cols = [
        "date",
        "trade_type",
        "symbol",
        "transaction_symbol",
        "price",
        "quantity",
        "amount",
        "commission",
        "multi_leg_position",
        "multi_leg_type",
        "multi_leg_amount",
    ]
    options = options[options_cols]

    options = options.sort_values(by=["date", "symbol"], ascending=[False, True])
    options = options.reset_index(drop=True)

    return options

### Create Overrides DataFrame

___

## Load Data
### Paths

In [None]:
backend_dir = Path.cwd().parent
data_dir = backend_dir / "data"

### Load

In [None]:
history_raw = read_api_data(endpoint_name="history", data_dir=data_dir)
history_raw = history_raw["history"]["event"]

## Transform
### Split Transactions and Validate with Pydantic Models
Source Variable:
- `history_raw`: raw history of all transactions

Resulting Validated Data Structures:
- `trades` - contains 7 keys:
    - `equity_raw`
    - `equity`
    - `options_raw`
    - `options`
    - `overrides_raw`
    - `overrides`
    - `exercised` - TODO

> *note: keys with the suffix `_raw` match the shape of the data returned by the API. Non-suffixed keys are flattened versions ready for DataFrame creation.*

Additional Non-Validated Data Structure:
- `other_raw` - any remaining transactions that don't fit the categories above. This structure does not contain anything modeled by pydantic.

In [None]:
trades, other_raw = split_history_and_validate(history_raw)

In [None]:
print("Number of each transaction type")
print(f"Equities:    {len(trades['equity'])}")
print(f"Options:   {len(trades['options'])}")
print(f"Overrides: {len(trades['overrides'])}")
print(f"Other:     {len(other_raw)}")

### Create DataFrames
#### Equities

In [None]:
equity = create_equity_dataframe(trades["equity"])
equity.info()

In [None]:
equity.head()

#### Options

In [None]:
options = create_options_dataframe(trades["options"])
options.info()

In [None]:
options.head()

#### Overrides

In [None]:
transaction_symbol_pattern = re.compile(r"([A-Z]+)[0-9]+([A-Z])([0-9]+)")

overrides_dump = [transaction.model_dump() for transaction in trades["overrides"]]
overrides = pd.DataFrame(overrides_dump)

# Convert to more optimal data types.
overrides["option_type"] = overrides["option_type"].astype(pd.StringDtype())
overrides["description"] = overrides["description"].astype(pd.StringDtype())

overrides["multi_leg_position"] = "long"
overrides.loc[overrides["quantity"] < 0, "multi_leg_position"] = "short"
overrides["multi_leg_position"] = overrides["multi_leg_position"].astype(
    pd.StringDtype()
)

overrides.info()

In [None]:
overrides.head()

In [None]:
description_split = overrides["description"].str.extract(
    transaction_symbol_pattern, expand=True
)
description_split.head()
description_split = description_split.rename(
    columns={0: "symbol", 1: "multi_leg_type", 2: "multi_leg_amount"}
)