# Load the dataset and get sale prices based on one of the simulation methods


## **Table of Contents**
1. [Data Loading](#load-data)  
2. [Adding `sold_price`](#select-a-method-to-get-the-sale-price-and-add-them-to-a-new-column)  
3. [Visualization](#visualize)  
4. [Saving Data](#save-data)  

## Load Data

In [1]:
# to work from the project's root directory
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

In [None]:
import pandas as pd

df = pd.read_csv("../datasets/raw_data/test_data_15_2608.csv", comment="#")  # if the first line of the csv contains a comment

df.head()

Ensure that data is in the correct format by loading the Pydantic model

In [None]:
from src.models import SaleRow
from pydantic import ValidationError


def validate_df(df):
    try:
        validated_rows = [SaleRow(**row) for row in df.to_dict(orient='records')]
        return validated_rows
    except ValidationError as e:
        print(e)
        return None

# usage
validated_rows = validate_df(df)
if validated_rows:
    print("All rows are valid")
else:
    print("Some rows are invalid")

## Select a method to get the sale price and add them to a new column

In [6]:
sale_price_commision = 0.98  # 0.02 = 2% commision (e.g sold_price * 0.98), 1 if its not exist

#### 1) get_average_sale_price

*P.S. To learn more about the function you can read its documentation (ctrl + click on functions name).*

In [None]:
from src.simulations.simple_mean import get_average_sale_price 


print(f"(before) amount of columns is {len(df.columns)}")

df['sold_price'] = df.apply(
    lambda row: get_average_sale_price([
        row['price_11'],
        row['price_12'],
        row['price_13'],
        row['price_14'],
        row['price_15']
    ]) * sale_price_commision,
    axis=1
)

df['sold_price'] = df['sold_price'].apply(lambda x: round(x, 3))

print(f"(after) amount of columns is {len(df.columns)}")


#### 2) calculate_expected_sale_price

*P.S. To learn more about the function you can read its documentation (ctrl + click on functions name).*

In [None]:
from src.simulations.expected_sale_price import calculate_expected_sale_price 


min_profit = 0.25
delta = 0.01
success_prob = 0.75
fee = 0.87


print(f"(before) amount of columns is {len(df.columns)}")

df['sold_price'] = df.apply(
    lambda row: calculate_expected_sale_price(
            [
                row['price_11'],
                row['price_12'],
                row['price_13'],
                row['price_14'],
                row['price_15']
            ],
            row["buy_price"],
            min_profit,
            delta,
            success_prob,
            fee
        ) * sale_price_commision,
    axis=1
)

df['sold_price'] = df['sold_price'].apply(lambda x: round(x, 3))

print(f"(after) amount of columns is {len(df.columns)}")


## Visualize

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Функция для отображения графиков
def plot_random_samples(df, n_samples=10):
    # Выбираем случайные n_samples строк
    sample_indices = np.random.choice(df.index, size=n_samples, replace=False)
    sampled_df = df.loc[sample_indices]

    # Создаем графики
    fig, axes = plt.subplots(n_samples, 1, figsize=(10, 3 * n_samples))
    if n_samples == 1:
        axes = [axes]  # Чтобы работало для n_samples=1

    for i, (idx, row) in enumerate(sampled_df.iterrows()):
        # Цены price_1-10
        prices_1_10 = row[['price_1', 'price_2', 'price_3', 'price_4', 'price_5',
                           'price_6', 'price_7', 'price_8', 'price_9', 'price_10']]
        
        # Цены price_11-15
        prices_11_15 = row[['price_11', 'price_12', 'price_13', 'price_14', 'price_15']]
        
        # Sold price
        sold_price = row['sold_price']

        # Построение графика
        ax = axes[i]
        ax.plot(range(1, 11), prices_1_10, label='Price 1-10', marker='o')
        ax.plot(range(11, 16), prices_11_15, label='Price 11-15', marker='x')
        ax.axhline(sold_price, color='red', linestyle='--', label='Sold Price')
        ax.set_title(f"Item {idx}")
        ax.set_xlabel("Day")
        ax.set_ylabel("Price")
        ax.legend()
        ax.grid()

    plt.tight_layout()
    plt.show()

# Пример использования
plot_random_samples(df, n_samples=30)

## Save Data

In [8]:
csv_name = "../datasets/with_sale_prices/test_data_15_2608.csv"
description = "test data of 15 prices and timestamps with a masked name with a sold prices [sold_price] (origin - not specified)\n"


def save_csv_with_a_description(df: pd.DataFrame, csv_name: str, description: str):
    description = "# " + description  # add a comment symbol

    df.to_csv(csv_name, index=False)

    # open csv and add  description at the firsrt row
    with open(csv_name, 'w', encoding='utf-8', newline='') as f:
        f.write(description)  # Записываем описание
        df.to_csv(f, index=False)  # Записываем DataFrame


save_csv_with_a_description(df, csv_name, description)