# Import Dependencies

In [1]:
import pandas as pd
import os

# Join files

In [10]:
# Path to yfinance data
file_path = '../data/Data'

# Load all CSVs into a single DataFrame
all_data = []
for filename in os.listdir(file_path):
    if filename.endswith('.csv'):
        # The ticker is usually in the filename
        ticker = filename.split('.')[0]
        df = pd.read_csv(os.path.join(file_path, filename))
        df['Ticker'] = ticker # Add a ticker column
        all_data.append(df)

# Concatenate all data into one DataFrame
stock_data = pd.concat(all_data, ignore_index=True)
stock_data.head()


Unnamed: 0,Date,Close,High,Low,Open,Volume,Ticker
0,2009-01-02,2.718,2.7265,2.5535,2.5675,145928000,AMZN
1,2009-01-05,2.703,2.787,2.6515,2.7865,190196000,AMZN
2,2009-01-06,2.868,2.911,2.6875,2.7275,221602000,AMZN
3,2009-01-07,2.81,2.8475,2.7675,2.8145,158854000,AMZN
4,2009-01-08,2.858,2.866,2.729,2.7495,131558000,AMZN


# Data Cleaning

In [11]:
# Convert 'Date' column to datetime objects
stock_data['Date'] = pd.to_datetime(stock_data['Date'])

# Ensure data types are correct (Open, High, Low, Close, Volume should be numeric)
for col in ['Open', "High", 'Low', 'Close', 'Volume']:
    stock_data[col] = pd.to_numeric(stock_data[col], errors='coerce')

# Sort the data (crucial for time-series calculations)
stock_data.sort_values(by=['Ticker', 'Date'], inplace=True)

# Set the Date as the index (useful for many financial analyses)
stock_data.set_index('Date', inplace=True)

# Drop ant rows with missing values
stock_data.dropna(inplace=True)

print("Data loaded and prepared successfully!")
print(f"Loaded data for {stock_data['Ticker'].nunique()} stocks.")
print(stock_data.head())

Data loaded and prepared successfully!
Loaded data for 6 stocks.
               Close      High       Low      Open      Volume Ticker
Date                                                                 
2009-01-02  2.721686  2.730385  2.554037  2.575630   746015200   AAPL
2009-01-05  2.836553  2.884539  2.780469  2.794266  1181608400   AAPL
2009-01-06  2.789767  2.914229  2.770872  2.877641  1289310400   AAPL
2009-01-07  2.729484  2.774170  2.706990  2.753477   753048800   AAPL
2009-01-08  2.780169  2.793666  2.700393  2.712090   673500800   AAPL
