**Load Stocks Data**

Source: datasets are extracted from https://www.nasdaq.com/ and
https://www.macrotrends.net/

In [9]:
import pandas as pd

# List of file paths with their corresponding asset names
file_paths_and_assets = [
    ("../datasets/HistoricalData_INTC.csv", "INTC"),
    ("../datasets/HistoricalData_CRM.csv", "CRM"),
    ("../datasets/HistoricalData_SU.csv", "SU"),
    ("../datasets/HistoricalData_CNQ.csv", "CNQ"),
    ("../datasets/HistoricalData_JNJ.csv", "JNJ"),
    ("../datasets/HistoricalData_INVA.csv", "INVA"),
    ("../datasets/HistoricalData_AMGN.csv", "AMGN"),
    ("../datasets/HistoricalData_TSM.csv", "TSM"),
    ("../datasets/HistoricalData_AAPL.csv", "AAPL"),
    ("../datasets/HistoricalData_NFLX.csv", "NFLX"),
    ("../datasets/HistoricalData_CLS.csv", "CLS"),
    ("../datasets/HistoricalData_AVGO.csv", "AVGO"),
    ("../datasets/HistoricalData_MRVL.csv", "MRVL"),
    ("../datasets/HistoricalData_PLTR.csv", "PLTR"),
    ("../datasets/HistoricalData_CRWD.csv", "CRWD"),
    ("../datasets/HistoricalData_ANET.csv", "ANET"),
    ("../datasets/HistoricalData_ATI.csv", "ATI"),
    ("../datasets/HistoricalData_VRT.csv", "VRT"),
    ("../datasets/HistoricalData_FICO.csv", "FICO"),
    ("../datasets/HistoricalData_UBER.csv", "UBER"),
    ("../datasets/HistoricalData_AMD.csv", "AMD"),
    ("../datasets/HistoricalData_PYPL.csv", "PYPL"),
    ("../datasets/HistoricalData_EA.csv", "EA"),
    ("../datasets/HistoricalData_ZG.csv", "ZG"),
    ("../datasets/HistoricalData_TTD.csv", "TTD"),
    ("../datasets/HistoricalData_DDOG.csv", "DDOG"),
]

# Initialize an empty list to store processed DataFrames
dataframes = []

# Process each file
for file_path, asset_name in file_paths_and_assets:
    try:
        # Load the CSV into a DataFrame
        df = pd.read_csv(file_path)
        
        # Add the 'Asset' column
        df['Asset'] = asset_name
        
        # Filter and rename columns
        df = df[['Date', 'Asset', 'Close']]
        
        # Append the processed DataFrame to the list
        dataframes.append(df)
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")

# Combine all processed DataFrames into one
final_df = pd.concat(dataframes, ignore_index=True)

# Display the combined DataFrame
print(final_df)

              Date Asset     Close
0       1980-03-17  INTC    0.1815
1       1980-03-18  INTC    0.1801
2       1980-03-19  INTC    0.1844
3       1980-03-20  INTC    0.1837
4       1980-03-21  INTC    0.1771
...            ...   ...       ...
147620  2024-11-15  DDOG  126.0900
147621  2024-11-18  DDOG  125.9700
147622  2024-11-19  DDOG  133.4100
147623  2024-11-20  DDOG  135.4500
147624  2024-11-21  DDOG  143.8600

[147625 rows x 3 columns]


In [10]:
print(sorted(final_df['Asset'].unique()))

['AAPL', 'AMD', 'AMGN', 'ANET', 'ATI', 'AVGO', 'CLS', 'CNQ', 'CRM', 'CRWD', 'DDOG', 'EA', 'FICO', 'INTC', 'INVA', 'JNJ', 'MRVL', 'NFLX', 'PLTR', 'PYPL', 'SU', 'TSM', 'TTD', 'UBER', 'VRT', 'ZG']


In [11]:
# print sum of missing values in each column
print(final_df.isna().sum())

Date     0
Asset    0
Close    0
dtype: int64


**Inject data to MongoDB**

In [12]:
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")

In [13]:
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient(MONGO_URI)
db = client["robo_advisor"]  # Use the "robo_advisor" database
collection = db["historical_prices"]  # Use the "market_data" collection

In [14]:
# Insert data into MongoDB
data_dict = final_df.to_dict("records")  # Convert DataFrame to list of dictionaries
collection.insert_many(data_dict)  # Insert into the "market_data" collection

print("data successfully inserted into the 'historical_prices' collection.")

data successfully inserted into the 'historical_prices' collection.
