# Update csv with latest data from yfinance

In [3]:
import os
import pandas as pd
import yfinance as yf

def update_data_and_append(ticker, csv_file_path, start_date, end_date):
    if os.path.exists(csv_file_path):
        existing_data = pd.read_csv(csv_file_path)
        last_date = pd.to_datetime(existing_data['Date']).max()
        end_date_timestamp = pd.to_datetime(end_date)  # Convert end_date to Timestamp
        if last_date >= end_date_timestamp:
            print(f"Data for {ticker} is up to date.")
            return
        
        new_data = yf.download(ticker, start=last_date + pd.DateOffset(days=1), end=end_date)
        
        if not new_data.empty:
            combined_data = pd.concat([existing_data, new_data])
            combined_data.to_csv(csv_file_path, index=False)
            print(f"Data for {ticker} updated and appended.")
        else:
            print(f"No new data available for {ticker} in the specified date range.")
    else:
        print(f"CSV file '{csv_file_path}' does not exist.")

In [4]:
ticker_to_update = 'BTC-USD'
csv_file_path = 'BTC-USD_1d.csv'
start_date = '2023-01-01'
end_date = pd.Timestamp.today().strftime('%Y-%m-%d')

update_data_and_append(ticker_to_update, csv_file_path, start_date, end_date)

[*********************100%***********************]  1 of 1 completed


1 Failed download:
['BTC-USD']: IndexError('index 0 is out of bounds for axis 0 with size 0')



No new data available for BTC-USD in the specified date range.


# Preprocess Data

In [12]:
import os
import pandas as pd

def preprocess_and_save(data, ticker, output_folder="Regression_data"):
    # Preprocess your data here (example: separate date into year, month, day)
    data['Date'] = pd.to_datetime(data['Date'])
    data['Year'] = data['Date'].dt.year
    data['Month'] = data['Date'].dt.month
    data['Day'] = data['Date'].dt.day
    
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Save the preprocessed data in the output folder
    output_file_path = os.path.join(output_folder, f"prep_{ticker}_data.csv")
    data.to_csv(output_file_path, index=False)
    print(f"Preprocessed data for {ticker} saved in {output_file_path}")




In [15]:

ticker = 'BTC-USD'
data = pd.read_csv('LTC-USD_1d.csv')

preprocess_and_save(data, ticker, "Regression_Data")

Preprocessed data for BTC-USD saved in Regression_Data\prep_BTC-USD_data.csv


# Print features

In [16]:
import pandas as pd

def print_features(ticker, input_folder="Regression_data"):
    file_path = os.path.join(input_folder, f"prep_{ticker}_data.csv")
    
    if os.path.exists(file_path):
        data = pd.read_csv(file_path)
        print(f"Features for {ticker} data:")
        print(data.columns.tolist())
    else:
        print(f"Preprocessed data file for {ticker} not found.")

# Example usage
ticker_to_print = 'BTC-USD'
print_features(ticker_to_print)


Features for BTC-USD data:
['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Year', 'Month', 'Day']
