# 감정의 AR 구하기

### Preprocess dataset

In [None]:
import os
import pandas as pd
from data.sentiment import *
from event.musk import MUSK_TWEET_TIMES

data_dir = "sentiment_dataset"
processed_data_dir = "sentiment_dataset_processed"
data_files: list[str] = [d.strftime("sentiment_%Y-%m-%d.csv") for d in MUSK_TWEET_TIMES]  

In [None]:
# Process data

os.makedirs(processed_data_dir, exist_ok=True)

columns_to_process = [
    ColumnName.N_TOTAL,
    ColumnName.POS_NEG_DIFF,
    ColumnName.AVERAGE,
    ColumnName.WEIGHTED_AVERAGE,
    ColumnName.BULLISHNESS,
    ColumnName.POSITIVE_RATIO,
    ColumnName.NEGATIVE_RATIO,
    ColumnName.CUMULATED_POSITIVE_RATIO,
    ColumnName.CUMULATED_NEGATIVE_RATIO,
]

for file in data_files:
    path = os.path.join(data_dir, file)
    df = pd.read_csv(path)

    add_bullishness(df)
    add_pos_neg_diff(df)
    add_ratio(df)

    add_diff(df, columns_to_process)        # type: ignore
    # add_log_diff(df, columns_to_process)    # type: ignore

    df.to_csv(os.path.join(processed_data_dir, file), index=False)

In [None]:
data_files = [os.path.join(processed_data_dir, file) for file in data_files]

In [None]:
# eventstudy의 요구에 맞게 column 이름 바꾸기

import pandas as pd

for path in set(data_files):
    df = pd.read_csv(path)
    df.rename(columns={"time": "date"}, inplace=True)
    df.to_csv(path, index=False)

### Create events with saved data files

In [None]:
from event import SentimentAggregator, MUSK_TWEET_TIMES

os.makedirs("results_sent_single", exist_ok=True)

aggregator = SentimentAggregator()
aggregator.reset()
aggregator.use_column("diff_pos_neg_diff")

aggregator.create_multiple_events(
    event_times=MUSK_TWEET_TIMES,
    data_files=data_files,
    event_window=(0, +60),
    estimation_window=(-240, -60),
    result_file_format="results_sent_single/sent_{}.xlsx",
)

### Aggregate events

In [None]:
file_name = "sent_aggr_m240_m60_0060"
os.makedirs("results_sent", exist_ok=True)

result = aggregator.aggregate(
    result_file=f"results_sent/{file_name}.xlsx",
    asterisks=False,
    rounding=8
)
result.to_csv(f"ar_data/{file_name}.csv", index=True)