In [1]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import shutil
from pathlib import Path
from typing import List, Union, Sequence, Iterable
import requests
import io
from IPython.display import HTML, display, display_markdown

# Q1. intraday seasonality
---

Let's look into the FX price data in this question.  
Local variable `ohlc` below holds 5 years of 1 minutes OHLC price data.  
FX price has some intraday seasonality. Let's check it with `ohlc` data.   

1. Calculate average of $HLRange$(*)  on each hour (hour=0, 1, 2 ... 23), then plot it.    
2. You will see 3 peaks in that plot. Infer the reason why they are formed and explain it in your own words.
  
  
Hints:  
- `groupby()` is usefull method to calculate stats by category.
- `timestamp` is based on UTC timezone.  

(*)
$$
HLRange(t) := \frac{High(t) - Low(t)}{Open(t)}
$$

In [2]:
# FX-OHLC data
ohlc = pd.read_csv("USDJPY.csv.gz", compression="gzip", encoding="utf-8")

display(
    ohlc.head(3),
    ohlc.timestamp.agg(["min", "max", "count"])
)

Unnamed: 0,timestamp,ticker,open,high,low,close
0,2018-01-01 22:00:00+00:00,USDJPY,112.645,112.645,112.645,112.645
1,2018-01-01 22:01:00+00:00,USDJPY,112.616,112.616,112.616,112.616
2,2018-01-01 22:02:00+00:00,USDJPY,112.634,112.634,112.634,112.634


min      2018-01-01 22:00:00+00:00
max      2022-12-30 21:58:00+00:00
count                      1858574
Name: timestamp, dtype: object

In [3]:
# Write your code and comments from here

# Q2. application LLM
---

Recently, ChatGPT and other large-scale language models have attracted people's attention.  
Please think and write about your ideas for using LLMs in finacial market analysis, taking into account the content of this lecture.

[Write down your thoughts here]

# Q3. Free analysis (*Optional)
---

Make your own hypothesis/question/experiment on FX OHLC and test it with the data.  Any idea is welcomed.  
Off course you can add on your own data if necesary. 

Examples:
- What is the relationship between EURUSD and USDJPY price direction, how they correlates and why?  

Hints:
- You can use other currnecy pair's data by changing argument `ticker` of `load_data()`. For available tickers, refer to https://www.histdata.com
- This function is NOT supported in Google Colab runtime. Use your local environment. 

In [None]:
!pip install histdata >/dev/null

In [4]:
from histdata import download_hist_data as dl
from histdata.api import Platform as P, TimeFrame as TF

def download_data(
    ticker: str,
    year: int
) -> str:
    """Download 1min OHLC data from histdata.com to `./data`
    """
    path = dl(year=str(year), month=None, pair=ticker, platform=P.META_TRADER, time_frame=TF.ONE_MINUTE, output_directory="data/zip")
    shutil.unpack_archive(path, extract_dir="data/")
    return path

def load_data(
    ticker: str = "USDJPY",
    year: Union[int, List[int]] = 2015
) -> pd.DataFrame:
    """Load 1min-OHLC data. (Download from histdata if not exists.)
    """
    if isinstance(year, Iterable):
        return pd.concat([load_data(ticker=ticker, year=_year) for _year in year])
    
    path = f"data/DAT_MT_{ticker.upper()}_M1_{year}.csv"
    if not Path(path).exists():
        download_data(ticker=ticker, year=year)
        
    data = pd.read_csv(path, names=["date", "time", "open", "high", "low", "close", "_"])
    data["timestamp"] = pd.to_datetime(data.date + " " + data.time).dt.tz_localize("EST").dt.tz_convert("UTC")
    data["ticker"] = ticker.upper()
    data["date"] = pd.to_datetime(data["date"])
    return data[["timestamp", "ticker", "open", "high", "low", "close"]]

In [5]:
# [write your code and comments from here]


---
---

# *About Submission