## Real-time data for LLMs

Clone the repo with notebooks and corresponding data.

In [None]:
!git clone https://github.com/TurboML-Inc/colab-notebooks.git

Set up the environment and install TurboML's SDK.

In [None]:
!pip install -q condacolab
import condacolab
condacolab.install()
!bash colab-notebooks/install_turboml.sh

The kernel should now be restarted with TurboML's SDK installed.

In [None]:
cd colab-notebooks

Login to your TurboML instance.

In [None]:
import pandas as pd
import turboml as tb

tb.init(backend_url=BACKEND_URL, api_key=API_KEY)

In [None]:
try:
    transactions = tb.PandasDataset(
        dataset_name="transactions_prompt",
        key_field="index",
        dataframe=pd.read_csv("data/transactions.csv").reset_index(),
        upload=True,
    )
except:
    transactions = tb.PandasDataset(dataset_name="transactions_prompt")

In [None]:
transactions.feature_engineering.register_timestamp(column_name="timestamp", format_type="epoch_seconds")

In [None]:
transactions.feature_engineering.create_aggregate_features(
    column_to_operate="transactionAmount",
    column_to_group="accountID",
    operation="SUM",
    new_feature_name="my_sum_feat",
    timestamp_column="timestamp",
    window_duration=24,
    window_unit="hours"
)
transactions.feature_engineering.materialize_features(["my_sum_feat"])

In [None]:
from typing import List, Dict
import pandas as pd
from turboml.common.feature_engineering import retrieve_features

class TurboMLPromptTemplate:
    def __init__(
        self,
        template: str,
        dataset_id: str,
    ):
        self.dataset_id = dataset_id
        self.template = template

    def get_prompts(self, df: pd.DataFrame) -> List[str]:
        prompts = []
        for prompt_data in retrieve_features(self.dataset_id, df).to_dict('records'):
            prompt = self.template.format(**prompt_data)
            prompts.append(prompt)
        return prompts

In [None]:
template_str = "Give the likelihood of fraud for account {accountID} for the transaction of amount {transactionAmount} performed at time {transactionTime} from the location {paymentBillingState}, given the total transacted amount in the last 24 hours is {my_sum_feat}: "
turbo_prompt = TurboMLPromptTemplate(
    template=template_str,
    dataset_id="transactions_prompt",
)

prompts = turbo_prompt.get_prompts(df=transactions.input_df[-5:])
for prompt in prompts:
    print(prompt)
    print("---")