**Load Stocks Data**

Source: datasets are extracted from https://www.nasdaq.com/ and
https://www.macrotrends.net/

In [6]:
import pandas as pd

# List of file paths with their corresponding asset names
file_paths_and_assets = [
    ("../datasets/HistoricalData_COST.csv", "COST"),
    ("../datasets/HistoricalData_ORCL.csv", "ORCL"),
    ("../datasets/HistoricalData_UNH.csv", "UNH"),
    ("../datasets/HistoricalData_WMT.csv", "WMT"),
    ("../datasets/HistoricalData_MA.csv", "MA"),
    ("../datasets/HistoricalData_V.csv", "V"),
    ("../datasets/HistoricalData_BA.csv", "BA"),
    ("../datasets/HistoricalData_ENB.csv", "ENB"),
    ("../datasets/HistoricalData_RY.csv", "RY"),
    ("../datasets/HistoricalData_TD.csv", "TD"),
    ("../datasets/HistoricalData_BMO.csv", "BMO"),
    ("../datasets/HistoricalData_CM.csv", "CM"),
    ("../datasets/HistoricalData_SHOP.csv", "SHOP"),
    ("../datasets/HistoricalData_MMM.csv", "MMM"),
    ("../datasets/HistoricalData_CAT.csv", "CAT"),
    ("../datasets/HistoricalData_XOM.csv", "XOM"),
    ("../datasets/HistoricalData_CVX.csv", "CVX"),
    ("../datasets/HistoricalData_ABT.csv", "ABT"),
    ("../datasets/HistoricalData_TMO.csv", "TMO"),
    ("../datasets/HistoricalData_SNOW.csv", "SNOW"),
    ("../datasets/HistoricalData_RIVN.csv", "RIVN"),
    ("../datasets/HistoricalData_WEAT.csv", "WEAT"),
    ("../datasets/HistoricalData_SLV.csv", "SLV"),
    ("../datasets/HistoricalData_USO.csv", "USO"),
    ("../datasets/HistoricalData_DBA.csv", "DBA"),
    ("../datasets/HistoricalData_LQD.csv", "LQD"),
    ("../datasets/HistoricalData_BND.csv", "BND"),
    ("../datasets/HistoricalData_XBB.csv", "XBB"),
    ("../datasets/HistoricalData_ICLN.csv", "ICLN"),
    ("../datasets/HistoricalData_TAN.csv", "TAN"),
    ("../datasets/HistoricalData_ARKK.csv", "ARKK"),
    ("../datasets/HistoricalData_GNOM.csv", "GNOM"),
]

# Initialize an empty list to store processed DataFrames
dataframes = []

# Process each file
for file_path, asset_name in file_paths_and_assets:
    try:
        # Load the CSV into a DataFrame
        df = pd.read_csv(file_path)
        
        # Add the 'Asset' column
        df['Asset'] = asset_name
        
        # Filter and rename columns
        df = df[['Date', 'Asset', 'Close']]
        
        # Append the processed DataFrame to the list
        dataframes.append(df)
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")

# Combine all processed DataFrames into one
final_df = pd.concat(dataframes, ignore_index=True)

# Display the combined DataFrame
print(final_df)

              Date Asset    Close
0       1986-07-09  COST   6.5786
1       1986-07-10  COST   6.5412
2       1986-07-11  COST   6.3170
3       1986-07-14  COST   6.1301
4       1986-07-15  COST   5.9432
...            ...   ...      ...
209702  04/15/2019  GNOM  14.9100
209703  04/12/2019  GNOM  15.0197
209704  04/11/2019  GNOM  15.0600
209705  04/10/2019  GNOM  15.3700
209706  04/09/2019  GNOM  15.0400

[209707 rows x 3 columns]


In [7]:
print(sorted(final_df['Asset'].unique()))

['ABT', 'ARKK', 'BA', 'BMO', 'BND', 'CAT', 'CM', 'COST', 'CVX', 'DBA', 'ENB', 'GNOM', 'ICLN', 'LQD', 'MA', 'MMM', 'ORCL', 'RIVN', 'RY', 'SHOP', 'SLV', 'SNOW', 'TAN', 'TD', 'TMO', 'UNH', 'USO', 'V', 'WEAT', 'WMT', 'XBB', 'XOM']


In [8]:
# print sum of missing values in each column
print(final_df.isna().sum())

Date     0
Asset    0
Close    0
dtype: int64


**Inject data to MongoDB**

In [9]:
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")

In [10]:
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient(MONGO_URI)
db = client["robo_advisor"]  # Use the "robo_advisor" database
collection = db["historical_prices"]  # Use the "market_data" collection

In [11]:
# Insert data into MongoDB
data_dict = final_df.to_dict("records")  # Convert DataFrame to list of dictionaries
collection.insert_many(data_dict)  # Insert into the "market_data" collection

print("data successfully inserted into the 'historical_prices' collection.")

data successfully inserted into the 'historical_prices' collection.
