# Stock Data Downloader üìä

This notebook automatically detects the environment (Local, Kaggle, or Google Colab) and downloads stock data to the appropriate location.

## Features ‚ú®
- ‚úÖ **Automatic Environment Detection**: Detects whether running locally, on Kaggle, or on Google Colab
- ‚úÖ **Google Drive Integration**: Automatically mounts Google Drive on Colab and saves files there
- ‚úÖ **Multi-Stock Download**: Download data for multiple stock tickers at once
- ‚úÖ **Flexible Date Ranges**: Customize start/end dates and data intervals
- ‚úÖ **CSV Export**: Saves each stock's data to a separate CSV file with timestamp

## Output Locations üìÅ
- **Google Colab**: `/content/drive/MyDrive/Stocks_Data/`
- **Kaggle**: `/kaggle/working/`
- **Local**: `./output/`

## How to Use üöÄ
1. Run all cells in order (Cell ‚Üí Run All)
2. If on Google Colab, you'll be prompted to authorize Google Drive access
3. Customize the `TICKERS` list and date range in the example usage section
4. The CSV files will be automatically saved to the appropriate location

## Requirements üì¶
- yfinance
- pandas
- numpy

All dependencies will be installed automatically when you run the notebook.

In [1]:
# Environment Detection and Setup
import os
import sys
from pathlib import Path

def detect_environment():
    """
    Detects whether the code is running locally, on Kaggle, or on Google Colab.
    Returns: 'local', 'kaggle', or 'colab'
    """
    # Check for Google Colab
    try:
        import google.colab
        return 'colab'
    except ImportError:
        pass
    
    # Check for Kaggle
    if 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
        return 'kaggle'
    
    # Default to local
    return 'local'

def setup_environment():
    """
    Sets up the environment based on where the code is running.
    Returns the output directory path.
    """
    env = detect_environment()
    print(f"üîç Detected environment: {env.upper()}")
    
    if env == 'colab':
        # Mount Google Drive
        print("üìÇ Mounting Google Drive...")
        from google.colab import drive
        drive.mount('/content/drive')
        
        # Set output path to Google Drive
        output_dir = Path('/content/drive/MyDrive/Stocks_Data')
        print(f"‚úÖ Google Drive mounted successfully")
        
    elif env == 'kaggle':
        # Kaggle output directory
        output_dir = Path('/kaggle/working')
        print(f"‚úÖ Using Kaggle working directory")
        
    else:  # local
        # Local output directory (same as notebook location)
        output_dir = Path.cwd() / 'output'
        print(f"‚úÖ Using local directory")
    
    # Create output directory if it doesn't exist
    output_dir.mkdir(parents=True, exist_ok=True)
    print(f"üìÅ Output directory: {output_dir}")
    
    return env, output_dir

# Run setup
ENVIRONMENT, OUTPUT_DIR = setup_environment()

üîç Detected environment: LOCAL
‚úÖ Using local directory
üìÅ Output directory: c:\Users\Ferhat\Documents\GitHub\Stocks\output


In [2]:
# Install required packages (if not already installed)
import subprocess
import sys

def install_package(package):
    """Install a package using pip if not already installed."""
    try:
        __import__(package.split('[')[0])
        print(f"‚úì {package} already installed")
    except ImportError:
        print(f"üì¶ Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
        print(f"‚úì {package} installed successfully")

# Install required packages
packages = ['yfinance', 'pandas', 'numpy']

for package in packages:
    install_package(package)

‚úì yfinance already installed
‚úì pandas already installed
‚úì numpy already installed


In [None]:
# Import required libraries
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ All libraries imported successfully")

In [None]:
# Stock Data Downloader Functions
def download_stock_data(tickers, start_date=None, end_date=None, interval='1d'):
    """
    Download stock data for multiple tickers.
    
    Parameters:
    -----------
    tickers : list or str
        Single ticker symbol or list of ticker symbols
    start_date : str, optional
        Start date in 'YYYY-MM-DD' format. Default is 1 year ago.
    end_date : str, optional
        End date in 'YYYY-MM-DD' format. Default is today.
    interval : str, optional
        Data interval: 1d, 1wk, 1mo, etc. Default is '1d'.
    
    Returns:
    --------
    dict : Dictionary with ticker symbols as keys and DataFrames as values
    """
    # Convert single ticker to list
    if isinstance(tickers, str):
        tickers = [tickers]
    
    # Set default dates
    if end_date is None:
        end_date = datetime.now().strftime('%Y-%m-%d')
    if start_date is None:
        start_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
    
    print(f"üìä Downloading data for {len(tickers)} ticker(s)")
    print(f"üìÖ Date range: {start_date} to {end_date}")
    print(f"‚è±Ô∏è  Interval: {interval}")
    print("-" * 50)
    
    stock_data = {}
    failed_tickers = []
    
    for ticker in tickers:
        try:
            print(f"Downloading {ticker}...", end=" ")
            data = yf.download(ticker, start=start_date, end=end_date, 
                             interval=interval, progress=False)
            
            if not data.empty:
                stock_data[ticker] = data
                print(f"‚úì ({len(data)} rows)")
            else:
                print(f"‚úó No data available")
                failed_tickers.append(ticker)
                
        except Exception as e:
            print(f"‚úó Error: {str(e)}")
            failed_tickers.append(ticker)
    
    print("-" * 50)
    print(f"‚úÖ Successfully downloaded: {len(stock_data)}/{len(tickers)} tickers")
    
    if failed_tickers:
        print(f"‚ùå Failed tickers: {', '.join(failed_tickers)}")
    
    return stock_data

def save_stock_data(stock_data, output_dir, prefix='stock_data'):
    """
    Save stock data to CSV files.
    
    Parameters:
    -----------
    stock_data : dict
        Dictionary with ticker symbols as keys and DataFrames as values
    output_dir : Path
        Directory to save the files
    prefix : str, optional
        Prefix for the output filenames
    
    Returns:
    --------
    list : List of saved file paths
    """
    saved_files = []
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    
    print(f"\nüíæ Saving data to: {output_dir}")
    print("-" * 50)
    
    for ticker, data in stock_data.items():
        # Create filename
        filename = f"{prefix}_{ticker}_{timestamp}.csv"
        filepath = output_dir / filename
        
        # Save to CSV
        data.to_csv(filepath)
        saved_files.append(filepath)
        print(f"‚úì Saved {ticker}: {filename}")
    
    print("-" * 50)
    print(f"‚úÖ Total files saved: {len(saved_files)}")
    
    return saved_files

print("‚úÖ Stock downloader functions defined")

In [None]:
# Example Usage: Download and Save Stock Data
# Customize this section with your desired tickers and date range

# Define the stocks you want to download
TICKERS = ['AAPL', 'GOOGL', 'MSFT', 'TSLA', 'AMZN']

# Define date range (optional - defaults to last year)
START_DATE = '2024-01-01'  # Change as needed
END_DATE = None  # None = today

# Download the data
stock_data = download_stock_data(
    tickers=TICKERS,
    start_date=START_DATE,
    end_date=END_DATE,
    interval='1d'  # Options: 1d, 1wk, 1mo, etc.
)

# Save the data to CSV files
# The files will be saved to the appropriate location based on environment:
# - Google Colab: /content/drive/MyDrive/Stocks_Data/
# - Kaggle: /kaggle/working/
# - Local: ./output/
saved_files = save_stock_data(stock_data, OUTPUT_DIR, prefix='stock_data')

print(f"\nüéâ Process complete! Files saved to {OUTPUT_DIR}")

In [None]:
# Optional: Display a sample of the downloaded data
if 'stock_data' in globals() and stock_data:
    print("\nüìà Sample Data Preview:")
    print("=" * 80)
    
    # Show the first ticker's data as a sample
    sample_ticker = list(stock_data.keys())[0]
    sample_data = stock_data[sample_ticker]
    
    print(f"\nSample: {sample_ticker}")
    print(f"Shape: {sample_data.shape[0]} rows √ó {sample_data.shape[1]} columns")
    print(f"Date range: {sample_data.index[0]} to {sample_data.index[-1]}")
    print("\nFirst 5 rows:")
    print(sample_data.head())
    print("\nLast 5 rows:")
    print(sample_data.tail())
    
    # Summary statistics
    print("\nüìä Summary Statistics:")
    print(sample_data.describe())
else:
    print("‚ö†Ô∏è No data available to display. Please run the download cell first.")