# BTC Data Pull

In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import pytz


  _empty_series = pd.Series()


In [2]:
# Adjusting the start and end times
frequency = "1m"
start_time = (datetime.now(pytz.timezone('UTC')) - timedelta(days=7)).strftime('%Y-%m-%d')  # 5 days ago from the current date
end_time = (datetime.now(pytz.timezone('UTC'))).strftime('%Y-%m-%d')  # Current date in UTC

In [3]:
# Define your tickers, start time, end time, and frequency
tickers = ["BTC-USD"]

# Initialize an empty DataFrame for combined data
df = pd.DataFrame()

# Retrieve historical data for each ticker
for ticker in tickers:
    # Download historical data for the ticker
    data = yf.download(ticker, start=start_time, end=end_time, interval=frequency)[['High', 'Low', 'Open', 'Close']]
    
    # Check if the index is already timezone-aware and convert timezone if necessary
    if data.index.tz is None:
        # If the index is not timezone-aware, localize to UTC first then convert to Pacific Time
        data.index = data.index.tz_localize('UTC').tz_convert('US/Pacific')
    else:
        # If the index is already timezone-aware, directly convert to Pacific Time
        data.index = data.index.tz_convert('US/Pacific')
    
    # Combine data for each ticker into a single DataFrame with unique column names
    for col in data.columns:
        df_coL_name = f"{ticker}_{col}"  # Create a unique column name
        df[df_coL_name] = data[col]

[*********************100%%**********************]  1 of 1 completed


### Load existing and merge

In [4]:
hist_df = pd.read_parquet('btc-min.parquet')
hist_df

Unnamed: 0_level_0,BTC-USD_High,BTC-USD_Low,BTC-USD_Open,BTC-USD_Close
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-02-09 16:01:00-08:00,47163.984375,47163.984375,47163.984375,47163.984375
2024-02-09 16:02:00-08:00,47195.679688,47195.679688,47195.679688,47195.679688
2024-02-09 16:03:00-08:00,47216.957031,47216.957031,47216.957031,47216.957031
2024-02-09 16:04:00-08:00,47228.175781,47228.175781,47228.175781,47228.175781
2024-02-09 16:05:00-08:00,47222.082031,47222.082031,47222.082031,47222.082031
...,...,...,...,...
2024-02-16 15:54:00-08:00,52178.539062,52178.539062,52178.539062,52178.539062
2024-02-16 15:55:00-08:00,52161.660156,52161.660156,52161.660156,52161.660156
2024-02-16 15:56:00-08:00,52167.261719,52167.261719,52167.261719,52167.261719
2024-02-16 15:57:00-08:00,52177.156250,52177.156250,52177.156250,52177.156250


In [5]:
# If you want to avoid duplicates and only update existing indices or append non-existing ones, consider using `pd.concat()` with appropriate arguments or `DataFrame.update()`
hist_df = pd.concat([hist_df, df]).drop_duplicates()

# If maintaining the datetime index order is important, you might want to sort after concatenation
hist_df.sort_index(inplace=True)
hist_df

Unnamed: 0_level_0,BTC-USD_High,BTC-USD_Low,BTC-USD_Open,BTC-USD_Close
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-02-09 16:01:00-08:00,47163.984375,47163.984375,47163.984375,47163.984375
2024-02-09 16:02:00-08:00,47195.679688,47195.679688,47195.679688,47195.679688
2024-02-09 16:03:00-08:00,47216.957031,47216.957031,47216.957031,47216.957031
2024-02-09 16:04:00-08:00,47228.175781,47228.175781,47228.175781,47228.175781
2024-02-09 16:05:00-08:00,47222.082031,47222.082031,47222.082031,47222.082031
...,...,...,...,...
2024-02-16 15:54:00-08:00,52178.539062,52178.539062,52178.539062,52178.539062
2024-02-16 15:55:00-08:00,52161.660156,52161.660156,52161.660156,52161.660156
2024-02-16 15:56:00-08:00,52167.261719,52167.261719,52167.261719,52167.261719
2024-02-16 15:57:00-08:00,52177.156250,52177.156250,52177.156250,52177.156250


### Export Data

In [6]:
assert not pd.isnull(hist_df).any().any(), "DataFrame contains null values"

# Assuming hist_df is already defined, we will assert its index starts with a specific Datetime value.
expected_start_datetime = pd.Timestamp('2024-02-09 16:01:00-08:00', tz='US/Pacific')
assert hist_df.index[0] == expected_start_datetime, "Index does not start with the expected Datetime value."


In [7]:
hist_df.to_parquet('btc-min.parquet')