# **1. Mount Google Drive**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **2. Access the folder to store data**

In [2]:
# %cd folder_name_want_to_store_data_in
%cd /content/drive/MyDrive/Swinburne/COS30018_IntelligentSystem/COS30018_GroupAssignment/SharedNotebooks/group2_code_submission/data/raw

/content/drive/MyDrive/Swinburne/COS30018_IntelligentSystem/COS30018_GroupAssignment/SharedNotebooks/group2_code_submission/data/raw


# **3. Download stock data up to a specific date, and save them into the data folder**


In [None]:
import yfinance as yf
import pandas as pd
from datetime import datetime

# List of stock symbols and company names
stocks = {
    "AAPL": "Apple",
    "AMZN": "Amazon",
    "MSFT": "Microsoft",
    "GOOGL": "Google"
}

# Define the start and end dates for data
start_date = "2012-01-01"
end_date = "2024-12-12"

# Function to download and save stock data
def download_and_save_stock_data(stock_code, company_name, start, end):
    try:
        # Download data from Yahoo Finance
        data = yf.download(stock_code, start=start, end=end, interval="1d")

        # Check if data is not empty
        if not data.empty:
            # Download data from Yahoo Finance
            data = yf.download(stock_code, start=start, end=end, interval="1d")
            # data = yf.download(stock_code) #if take full data

            # Check if data is not empty
            if not data.empty:
                # Generate filename with date information
                filename = f"{company_name}_{start}_to_{end}.csv"
                filename = filename.replace("-", "")  # Remove dashes for clean filenames

                # Save data to CSV without column headers
                data.to_csv(filename, header=False)
                # data.to_csv(filename)

                print(f"\n[INFO] Data for {company_name} ({stock_code}) saved to: {filename}")
        else:
            print(f"[WARNING] No data found for {company_name} ({stock_code})")
    except Exception as e:
        print(f"[ERROR] Error retrieving data for {company_name} ({stock_code}): {e}")

# Loop through the stocks and download data
print("[INFO] Starting stock data download...\n")
for stock_code, company_name in stocks.items():
    download_and_save_stock_data(stock_code, company_name, start_date, end_date)

print("\n[INFO] Stock data download and save completed.")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

[INFO] Starting stock data download...


[INFO] Data for Apple (AAPL):
[INFO] Saved to: Apple_20120101_to_20241212.csv



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


[INFO] Data for Amazon (AMZN):
[INFO] Saved to: Amazon_20120101_to_20241212.csv

[INFO] Data for Microsoft (MSFT):
[INFO] Saved to: Microsoft_20120101_to_20241212.csv

[INFO] Data for Google (GOOGL):
[INFO] Saved to: Google_20120101_to_20241212.csv

[INFO] Stock data download and save completed.





# **4. Saved the heading for the csv files**

In [3]:
### PARAMETERS
# List of companies and corresponding file names
companies = {
    "Apple": "Apple_20120101_to_20241212.csv",
    "Amazon": "Amazon_20120101_to_20241212.csv",
    "Microsoft": "Microsoft_20120101_to_20241212.csv",
    "Google": "Google_20120101_to_20241212.csv"
}

### OUTPPUT
# company_data dictionary --> for all companies
# apple_data --> for Apple
# 6 columns = ["Date", "Close", "High", "Low", "Open", "Volume"]
############################

import pandas as pd
import matplotlib.pyplot as plt
import os

# Dictionary to store data for each company
company_data = {}

# Function to read CSV data
def read_csv_data(file_name):
    try:
        # Read the CSV file without headers and add column names
        data = pd.read_csv(file_name, header=None, names=["Date", "Close", "High", "Low", "Open", "Volume"])
        # data["Date"] = pd.to_datetime(data["Date"])  # Convert Date column to datetime
        print(f"--------Data in {file_name}--------")
        # print(f"data = {data[:5]}")
        print(data.head(5))

        # Overwrite the original file with correct column names
        data.to_csv(file_name, index=False)
        print(f"Updated file with correct headings: {file_name}")

        return data

    except Exception as e:
        print(f"Error reading file {file_name}: {e}")
        return None

# Load data into dictionary
for company, file_name in companies.items():
    if os.path.exists(file_name):
        data = read_csv_data(file_name)
        if data is not None:
            company_data[company] = data
        else:
            print(f"Failed to load data for {company}")
    else:
        print(f"File not found: {file_name}")

--------Data in Apple_20120101_to_20241212.csv--------
         Date      Close       High        Low       Open     Volume
0  2012-01-03  12.388996  12.427257  12.321814  12.333865  302220800
1  2012-01-04  12.455577  12.492935  12.330250  12.351941  260022000
2  2012-01-05  12.593857  12.609522  12.432377  12.501067  271269600
3  2012-01-06  12.725514  12.736059  12.629712  12.646281  318292800
4  2012-01-09  12.705328  12.886691  12.693880  12.818906  394024400
Updated file with correct headings: Apple_20120101_to_20241212.csv
--------Data in Amazon_20120101_to_20241212.csv--------
         Date   Close    High     Low    Open     Volume
0  2012-01-03  8.9515  8.9740  8.7775  8.7945  102216000
1  2012-01-04  8.8755  9.0250  8.8035  8.9605   84104000
2  2012-01-05  8.8805  8.9125  8.7025  8.7970   76182000
3  2012-01-06  9.1305  9.2325  8.8750  8.9035  140168000
4  2012-01-09  8.9280  9.2185  8.8500  9.1380  101138000
Updated file with correct headings: Amazon_20120101_to_20241212.cs