### Setup

In [None]:
%pip install finta

Collecting finta
  Downloading finta-1.3-py3-none-any.whl.metadata (6.4 kB)
Downloading finta-1.3-py3-none-any.whl (29 kB)
Installing collected packages: finta
Successfully installed finta-1.3


In [None]:
import os
import sys
import hashlib
from pathlib import Path

import kagglehub
import numpy as np
import pandas as pd
import yaml
from google.colab import drive
from google.colab import files
from kagglehub import KaggleDatasetAdapter

In [None]:
drive.mount('/content/drive')

DRIVE_ROOT = Path('/content/drive/MyDrive')

PROJECT_ROOT = DRIVE_ROOT / 'projects/btc'
PROJECT_DATA_DIR = PROJECT_ROOT / 'data'
PROJECT_BTC_RAW_DIR = PROJECT_DATA_DIR / 'raw'
PROJECT_BTC_CLEANED_DIR = PROJECT_DATA_DIR / 'cleaned'
PROJECT_BTC_METADATA_DIR = PROJECT_DATA_DIR / 'metadata'
PROJECT_EXTERNAL_DIR = PROJECT_DATA_DIR / 'external'

BTC_RAW_CSV_FILENAME = 'ohlcv_399_raw.csv'
BTC_CLEANED_CSV_FILENAME = 'ohlcv_399_cleaned.csv'

BTC_RAW_FILE_PATH = PROJECT_BTC_RAW_DIR / BTC_RAW_CSV_FILENAME
BTC_CLEANED_FILE_PATH = PROJECT_BTC_CLEANED_DIR / BTC_CLEANED_CSV_FILENAME

BTC_CHECKSUM_FILE_PATH = PROJECT_BTC_METADATA_DIR / 'checksums.txt'
BTC_VERSIONS_FILE_PATH = PROJECT_BTC_METADATA_DIR / 'versions.yaml'

GITIGNORE_FILE_PATH = PROJECT_ROOT / '.gitignore'

Mounted at /content/drive


In [None]:
project_root = os.path.abspath(PROJECT_ROOT)

if project_root not in sys.path:
  sys.path.append(project_root)

print(f'Project root \'{project_root}\' added to sys.path.')

Project root '/content/drive/MyDrive/projects/btc' added to sys.path.


In [None]:
from src.utils import *
from src.data_loader import *
from src.preprocessing import *
from src.data_splitter import *
from src.data_cleaner import *

In [None]:
print('--- Python Interpreter and Standard Library Versions ---')
print(f'- python: {sys.version}')
print(f'- python info: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}')

print(f'- os, hashlib, pathlib module: (version tied to Python interpreter)')

print('\n--- Third-Party Library Versions ---')
print(f'- numpy: {np.__version__}')
print(f'- pandas: {pd.__version__}')
print(f'- yaml: {yaml.__version__}')

print('\n--- Google Colab Specific Module Versions ---')
print(f'- google.colab.drive: (version tied to Colab environment)')
print(f'- google.colab.fils: (version tied to Colab environment)')

--- Python Interpreter and Standard Library Versions ---
- python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
- python info: 3.12.12
- os, hashlib, pathlib module: (version tied to Python interpreter)

--- Third-Party Library Versions ---
- numpy: 2.0.2
- pandas: 2.2.2
- yaml: 6.0.3

--- Google Colab Specific Module Versions ---
- google.colab.drive: (version tied to Colab environment)
- google.colab.fils: (version tied to Colab environment)


In [None]:
%pip show finta

Name: finta
Version: 1.3
Summary: Common financial technical indicators implemented in Pandas.
Home-page: https://github.com/peerchemist/finta
Author: Peerchemist
Author-email: peerchemist@protonmail.ch
License: LGPLv3+
Location: /usr/local/lib/python3.12/dist-packages
Requires: numpy, pandas
Required-by: 


### Fetch a Bitcoin historical dataset from the source.

In [None]:
file_path = 'btcusd_1-min_data.csv'

ds = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  'mczielinski/bitcoin-historical-data',
  file_path,
)

print('First 5 records:', ds.head())

  ds = kagglehub.load_dataset(


Using Colab cache for faster access to the 'bitcoin-historical-data' dataset.
First 5 records:       Timestamp  Open  High   Low  Close  Volume
0  1.325412e+09  4.58  4.58  4.58   4.58     0.0
1  1.325412e+09  4.58  4.58  4.58   4.58     0.0
2  1.325412e+09  4.58  4.58  4.58   4.58     0.0
3  1.325412e+09  4.58  4.58  4.58   4.58     0.0
4  1.325412e+09  4.58  4.58  4.58   4.58     0.0


### Save the raw dataset to Google Drive for persistent storage.

In [None]:
save_ds(BTC_RAW_FILE_PATH, ds)

Successfully saved the dataset to '/content/drive/MyDrive/projects/btc/data/raw/ohlcv_399_raw.csv'.


### Generates, verifies, and logs a SHA-256 checksum for a given file.

In [None]:
result, checksum = manage_checksum(BTC_RAW_FILE_PATH, PROJECT_BTC_METADATA_DIR, BTC_CHECKSUM_FILE_PATH)
manage_checksum(BTC_RAW_FILE_PATH, PROJECT_BTC_METADATA_DIR, BTC_CHECKSUM_FILE_PATH, True)

Checksum for 'ohlcv_399_raw.csv' saved to: '/content/drive/MyDrive/projects/btc/data/metadata/ohlcv_399_raw.txt'
Appended checksum to main log: '/content/drive/MyDrive/projects/btc/data/metadata/checksums.txt'
Verification successful for 'ohlcv_399_raw.csv': OK


(True, '2ba0dfcc71ce616d46b74a4c27a73061757fecaa0dcd3a82540095401b30b451')

### Create a .gitignore file to exclude large datasets and local configuration files.

In [None]:
create_gitignore(GITIGNORE_FILE_PATH)

Success: .gitignore file created at '/content/drive/MyDrive/projects/btc/.gitignore'


### Create a metadata file for the dataset.

In [None]:
create_metadata_yaml(ds, BTC_RAW_FILE_PATH, BTC_CLEANED_FILE_PATH,
                     BTC_VERSIONS_FILE_PATH, checksum)

Successfully added new version for 'ohlcv_399_raw' to '/content/drive/MyDrive/projects/btc/data/metadata/versions.yaml'
