In [1]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.arima.model import ARIMA
import ipywidgets as widgets
from IPython.display import display

# Set up the visualization aesthetics
sns.set(style="whitegrid")
%matplotlib inline

# Define the list of companies to analyze (by stock symbol)
companies = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']
bright_colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#c2c2f0']

# Function to fetch live stock data from Yahoo Finance
def fetch_stock_data(symbol, start_date='2020-01-01'):
    stock_data = yf.download(symbol, start=start_date)
    stock_data.reset_index(inplace=True)
    stock_data['Symbol'] = symbol
    return stock_data

# Function to display stock anomalies for a specific company
def display_anomalies_for_company(company, all_data, anomalies):
    company_data = all_data[all_data['Symbol'] == company]
    company_anomalies = anomalies[anomalies['Symbol'] == company]

    # Create new figure for the company-specific anomalies
    plt.figure(figsize=(14, 7))
    plt.plot(company_data['Date'], company_data['Close'], label=f'{company} Close Price', color='blue')
    plt.scatter(company_anomalies['Date'], company_anomalies['Close'], color='red', label='Anomalies', marker='x')
    plt.title(f'{company} Stock Price with Anomalies', fontsize=16)
    plt.xlabel('Date')
    plt.ylabel('Closing Price')
    plt.legend()
    plt.show()

# Fetch data for all companies dynamically
all_data = pd.DataFrame()

for company in companies:
    data = fetch_stock_data(company)  # This will fetch fresh, live data each time
    all_data = pd.concat([all_data, data], axis=0)

# Sort by date for time series analysis
all_data.sort_values('Date', inplace=True)

# Feature engineering: Create new features like returns, moving averages, etc.
all_data['returns'] = all_data['Close'].pct_change()
all_data['volume_change'] = all_data['Volume'].pct_change()

# Drop missing values that arise from feature creation
all_data.dropna(subset=['returns', 'volume_change'], inplace=True)

# Standardize the features for anomaly detection
scaler = StandardScaler()
df_scaled = scaler.fit_transform(all_data[['returns', 'volume_change']])

# Initialize the Isolation Forest model
model = IsolationForest(n_estimators=100, contamination=0.01, random_state=42)

# Fit the model and predict anomalies (-1 indicates anomaly)
all_data['anomaly'] = model.fit_predict(df_scaled)

# Filter anomalies
anomalies = all_data[all_data['anomaly'] == -1]

### Part 1: Anomalies Detection - Separate Graphs for all companies

fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(20, 10))
fig.suptitle('Anomalies in Stock Prices for Multiple Companies', fontsize=18)

for i, company in enumerate(companies):
    row, col = divmod(i, 3)  # Arrange in 2 rows, 3 columns
    company_data = all_data[all_data['Symbol'] == company]
    company_anomalies = anomalies[anomalies['Symbol'] == company]

    ax = axes[row, col]
    ax.plot(company_data['Date'], company_data['Close'], label=f'{company} Close Price', color=bright_colors[i])
    ax.scatter(company_anomalies['Date'], company_anomalies['Close'], color='red', label='Anomalies', marker='x')
    ax.set_title(f'{company} Stock Price', fontsize=14)
    ax.set_xlabel('Date')
    ax.set_ylabel('Closing Price')

    # Annotate anomalies explanation
    anomalies_count = len(company_anomalies)
    anomaly_text = f"{company} had {anomalies_count} unexpected price patterns."
    ax.text(0.5, -0.2, anomaly_text, transform=ax.transAxes, fontsize=10, va='center', ha='center')

# Remove the empty subplot (if there are 5 graphs, one slot is empty)
if len(companies) < 6:
    fig.delaxes(axes[1, 2])

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

### Part 2: Investment Suggestion with Forecast and Suggested Amount

fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(20, 10))
fig.suptitle('Investment Suggestions Based on Stock Price Forecast', fontsize=18)

forecast_days = 30
portfolio_value = 10000  # Hypothetical portfolio value

investment_suggestions = []

for i, company in enumerate(companies):
    row, col = divmod(i, 3)  # Arrange in 2 rows, 3 columns
    company_data = all_data[all_data['Symbol'] == company]

    # ARIMA model for forecasting
    model_arima = ARIMA(company_data['Close'], order=(5, 1, 0))  # (p,d,q) order of the ARIMA model
    model_fit = model_arima.fit()

    # Predicting the next 30 days
    forecast = model_fit.forecast(steps=forecast_days)
    forecast_dates = pd.date_range(company_data['Date'].iloc[-1], periods=forecast_days, freq='D')

    # Plot forecast
    ax_forecast = axes[row, col]
    ax_forecast.plot(company_data['Date'], company_data['Close'], label=f'{company} Historical Price', color=bright_colors[i])
    ax_forecast.plot(forecast_dates, forecast, label=f'{company} Forecast', linestyle='--', color='orange')
    ax_forecast.set_title(f'{company} Stock Price Forecast', fontsize=14)
    ax_forecast.set_xlabel('Date')
    ax_forecast.set_ylabel('Closing Price')

    # Simple Investment Strategy
    last_closing_price = company_data['Close'].iloc[-1]
    future_price = forecast.iloc[-1]

    if future_price > last_closing_price:  # Predicted price increase
        percentage_increase = ((future_price / last_closing_price) - 1) * 100
        investment_advice = f"Buy: The stock is expected to increase by {percentage_increase:.2f}%."
        suggested_investment = portfolio_value * (percentage_increase / 100)  # Dynamic investment based on forecasted gain
        advice_text = f"Suggestion: {investment_advice}\nInvest: ${suggested_investment:.2f}"
    else:  # Predicted price decrease
        percentage_decrease = ((last_closing_price / future_price) - 1) * 100
        investment_advice = f"Sell: The stock is expected to decrease by {percentage_decrease:.2f}%."
        suggested_sell = portfolio_value * (percentage_decrease / 100)
        advice_text = f"Suggestion: {investment_advice}\nSell: ${suggested_sell:.2f}"

    investment_suggestions.append(f"{company}: {investment_advice}")

    # Annotate investment suggestion explanation
    ax_forecast.text(0.5, -0.2, advice_text, transform=ax_forecast.transAxes, fontsize=10, va='center', ha='center')

# Remove the empty subplot (if there are 5 graphs, one slot is empty)
if len(companies) < 6:
    fig.delaxes(axes[1, 2])

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

# Print all investment suggestions
print("\n--- Investment Suggestions ---")
for suggestion in investment_suggestions:
    print(suggestion)

### Part 3: Asking User Which Company to Focus on for Detailed Analysis

# Create dropdown for company selection in Jupyter
dropdown = widgets.Dropdown(
    options=companies,
    description='Company:',
)

def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        selected_company = change['new']
        display_anomalies_for_company(selected_company, all_data, anomalies)

# Observe the dropdown selection
dropdown.observe(on_change)

# Display dropdown
display(dropdown)


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['AAPL']: ConnectionError(MaxRetryError('HTTPSConnectionPool(host=\'query1.finance.yahoo.com\', port=443): Max retries exceeded with url: /v1/test/getcrumb (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000002632C006090>: Failed to resolve \'query1.finance.yahoo.com\' ([Errno 11001] getaddrinfo failed)"))'))
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['MSFT']: ConnectionError(MaxRetryError('HTTPSConnectionPool(host=\'query1.finance.yahoo.com\', port=443): Max retries exceeded with url: /v1/test/getcrumb (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000002632C098500>: Failed to resolve \'query1.finance.yahoo.com\' ([Errno 11001] getaddrinfo failed)"))'))
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['GOOGL']: ConnectionError(MaxRetryError('HTTPS

ValueError: Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required by StandardScaler.