In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Fetch live stock data from Yahoo Finance
stock_symbol = 'AAPL'  # Example: Apple Inc. (AAPL)
start_date = '2020-01-01'
end_date = '2025-01-01'

data = yf.download(stock_symbol, start=start_date, end=end_date)

# Display the first few rows of the dataset
print(data.head())

# Basic Data Exploration
print("Dataset Information:")
print(data.info())

print("\nSummary Statistics:")
print(data.describe())

# Check for missing values
print("\nMissing Values:")
print(data.isnull().sum())

# Visualize the stock price trends
plt.figure(figsize=(12, 6))
plt.plot(data.index, data['Close'], label='Close Price')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.title('Stock Close Price Over Time')
plt.legend()
plt.show()

# Fill missing values (if any)
data = data.fillna(method='ffill')

# Feature Engineering: Add Moving Averages
window_size = 5
data['Moving_Avg'] = data['Close'].rolling(window=window_size).mean()

# Drop rows with NaN values caused by rolling window
data = data.dropna()

# Save the processed data
processed_data_path = 'processed_data.csv'
data.to_csv(processed_data_path)

# Visualize Moving Average and Close Price
plt.figure(figsize=(12, 6))
plt.plot(data.index, data['Close'], label='Close Price')
plt.plot(data.index, data['Moving_Avg'], label=f'{window_size}-Day Moving Average', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Stock Price with Moving Average')
plt.legend()
plt.show()

print("\nProcessed data saved at:", processed_data_path)
