# 머스크 트윗이 도지코인 가격에 미치는 영향 (Aggregated)
* 프로젝트의 background를 위한 분석
* 2021년 1월 ~ 4월의 18개 도지코인 관련 event를 aggregate

In [1]:
import os
from datetime import timedelta

from tqdm import tqdm

from data.price import get_price_data
from event import PriceAggregator, MUSK_TWEET_TIMES

aggregator = PriceAggregator()

In [2]:
# Event times
print(*MUSK_TWEET_TIMES, sep="\n")

2021-01-28 22:47:00
2021-02-04 07:35:00
2021-02-06 04:02:00
2021-02-07 07:41:00
2021-02-07 22:25:00
2021-02-10 15:08:00
2021-02-11 09:08:00
2021-02-14 23:25:00
2021-02-21 21:27:00
2021-02-24 13:00:00
2021-03-01 19:57:00
2021-03-06 04:40:00
2021-03-13 23:40:00
2021-04-01 11:25:00
2021-04-09 08:32:00
2021-04-15 04:33:00
2021-04-16 18:01:00
2021-04-28 07:20:00


### Download dataset

In [3]:
os.makedirs("price_dataset", exist_ok=True)
file_format = os.path.join("price_dataset", "price_{}.csv")

In [None]:
data_timedelta = timedelta(hours=10)

for idx, event_time in enumerate(tqdm(MUSK_TWEET_TIMES)):
    get_price_data(
        start_time=event_time - data_timedelta,
        end_time=event_time + data_timedelta,
        file_path=file_format.format(idx),
        include_volume=True,
    )

### Calculate log diff

In [None]:
import pandas as pd 
from data.sentiment import add_log_diff

column_names = ["DOGEUSDT", "DOGEUSDT_volume"]

def convert(path: str) -> None:
    df = pd.read_csv(path)
    add_log_diff(df, column_names=column_names)
    df.to_csv(path, index=False)

for i in range(18):
    convert(f"price_dataset/price_{i}.csv")

### Create events with dataset files

In [4]:
data_files = [file_format.format(i) for i in range(len(MUSK_TWEET_TIMES))]
os.makedirs("results_price_single", exist_ok=True)

aggregator.reset()
aggregator.use_column(column_name="log_diff_DOGEUSDT")
aggregator.create_multiple_events(
    event_times=MUSK_TWEET_TIMES,
    data_files=data_files,
    is_price=False,  # use calculated log diff
    event_window=(0, +60),
    estimation_window=(-300, -60),
    # result_file_format="results_price_single/price_{}.xlsx",
)

Using saved dataset files


100%|██████████| 18/18 [00:00<00:00, 83.75it/s]


### Aggregate events

In [5]:
import os

os.makedirs("results_price", exist_ok=True)
os.makedirs("ar_data", exist_ok=True)
file_name = "price_aggr_m300_m60_0060"

result = aggregator.aggregate(
    result_file=f"results_price/{file_name}.xlsx",
    asterisks=False,
    rounding=8
)
result.to_csv(f"ar_data/{file_name}.csv", index=False)

## Visualize price data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from event.musk import MUSK_TWEET_TIMES

for i in range(18):
    df = pd.read_csv(f"price_dataset/price_{i}.csv")
    event_time = MUSK_TWEET_TIMES[i]

    markers_on = []
    for idx, row in df.iterrows():
        if row["date"] == event_time:
            markers_on.append(idx)

    markers_on = [601]

    plt.plot(np.arange(len(df["DOGEUSDT"])), df["DOGEUSDT"], markevery=markers_on)
    plt.show()