In [7]:
import os
import pandas as pd

def aggregate_stock_data(data_dir, output_file):
    # Initialize an empty DataFrame
    aggregated_data = pd.DataFrame()

    # Iterate over each CSV file in the data directory
    for file_name in os.listdir(data_dir):
        if file_name.endswith('.csv'):
            # Extract the ticker symbol from the file name
            ticker = file_name.split('.')[0]

            # Read the CSV file into a DataFrame
            file_path = os.path.join(data_dir, file_name)
            df = pd.read_csv(file_path)

            # Add a column for the ticker symbol
            df['Ticker'] = ticker

            # Reorder the columns to place 'Ticker' before 'Date'
            columns_order = ['Ticker', 'Date', 'Low', 'Open', 'Volume', 'High', 'Close', 'Adjusted Close']
            df = df[columns_order]

            # Append the DataFrame to the aggregated DataFrame
            aggregated_data = pd.concat([aggregated_data, df], ignore_index=True)

    # Save the aggregated data to a new CSV file
    aggregated_data.to_csv(output_file, index=False)

# Specify the directory containing the CSV files and the output file name
data_directory = 'data'
output_csv = 'aggregated_data.csv'

# Run the aggregation function
aggregate_stock_data(data_directory, output_csv)

# Display the first few rows of the aggregated data to confirm it worked
aggregated_data = pd.read_csv(output_csv)
aggregated_data.head()


In [8]:
import yfinance as yf
import pandas as pd
from datetime import datetime

def fetch_recent_data(tickers, start_date, end_date, output_file):
    all_data = []

    for ticker in tickers:
        # Fetch data using yfinance
        data = yf.download(ticker, start=start_date, end=end_date)

        # Reset index to move Date from index to a column
        data.reset_index(inplace=True)

        # Add the ticker column
        data['Ticker'] = ticker

        # Reorder columns
        columns_order = ['Ticker', 'Date', 'Low', 'Open', 'Volume', 'High', 'Close', 'Adj Close']
        data = data[columns_order]

        # Rename columns to match the required format
        data.rename(columns={'Adj Close': 'Adjusted Close'}, inplace=True)

        all_data.append(data)

    # Concatenate all the data into a single DataFrame
    all_data_df = pd.concat(all_data, ignore_index=True)

    # Save to CSV
    all_data_df.to_csv(output_file, index=False)

# Define the tickers and date range
tickers = ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'V']
start_date = '2023-01-01'
end_date = '2024-05-24'
output_csv_recent = 'recent_data.csv'

# Fetch recent data and save to CSV
fetch_recent_data(tickers, start_date, end_date, output_csv_recent)

# Display the first few rows of the recent data to confirm it worked
recent_data = pd.read_csv(output_csv_recent)
recent_data.head()
