**Load S&P 500 ETFs (SPY) Historical Data**

In [6]:
import pandas as pd

# load csv file as DataFrame
csv_file_path = "datasets/HistoricalData_SPY.csv"
df = pd.read_csv(csv_file_path)

# add a new column 'ticker' for identification
df['ticker'] = 'SPY'

# reveiw the data
print(df.head())
print(df.info())
print(df.describe())


         Date  Close/Last    Volume    Open    High     Low ticker
0  11/06/2024      591.04  68181970  589.20  591.93  585.39    SPY
1  11/05/2024      576.70  39478320  570.74  576.74  570.52    SPY
2  11/04/2024      569.81  38216980  571.18  572.50  567.89    SPY
3  11/01/2024      571.04  45667530  571.32  575.55  570.62    SPY
4  10/31/2024      568.64  60182450  575.56  575.63  568.44    SPY
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2516 entries, 0 to 2515
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Date        2516 non-null   object 
 1   Close/Last  2516 non-null   float64
 2   Volume      2516 non-null   int64  
 3   Open        2516 non-null   float64
 4   High        2516 non-null   float64
 5   Low         2516 non-null   float64
 6   ticker      2516 non-null   object 
dtypes: float64(4), int64(1), object(2)
memory usage: 137.7+ KB
None
        Close/Last        Volume         Open         

In [8]:
# print sum of missing values in each column
print(df.isna().sum())

Date          0
Close/Last    0
Volume        0
Open          0
High          0
Low           0
ticker        0
dtype: int64


**Inject data to MongoDB**

In [12]:
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")

In [13]:
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient(MONGO_URI)
db = client["robo_advisor"]  # Use the "robo_advisor" database
collection = db["market_data"]  # Use the "market_data" collection

In [14]:
# Insert data into MongoDB
data_dict = df.to_dict("records")  # Convert DataFrame to list of dictionaries
collection.insert_many(data_dict)  # Insert into the "market_data" collection

print("SPY data successfully inserted into the 'market_data' collection.")

SPY data successfully inserted into the 'market_data' collection.


**Load NASDAQ-100 ETFs (QQQ) Historical Data**

In [None]:
# load csv file as DataFrame
csv_file_path = "datasets/HistoricalData_QQQ.csv"
df = pd.read_csv(csv_file_path)

# add a new column 'ticker' for identification
df['ticker'] = 'QQQ'

# reveiw the data
print(df.head())
print(df.info())
print(df.describe())

         Date  Close/Last    Volume    Open      High       Low ticker
0  11/06/2024      505.58  43082200  500.56  506.4100  499.6000    QQQ
1  11/05/2024      492.21  24353620  487.61  492.8800  487.5200    QQQ
2  11/04/2024      486.01  23291580  486.82  489.3800  484.2545    QQQ
3  11/01/2024      487.43  33655760  485.50  490.7507  485.2000    QQQ
4  10/31/2024      483.85  41245250  492.38  492.4300  483.7500    QQQ
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2516 entries, 0 to 2515
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Date        2516 non-null   object 
 1   Close/Last  2516 non-null   float64
 2   Volume      2516 non-null   int64  
 3   Open        2516 non-null   float64
 4   High        2516 non-null   float64
 5   Low         2516 non-null   float64
 6   ticker      2516 non-null   object 
dtypes: float64(4), int64(1), object(2)
memory usage: 137.7+ KB
None
        Close/Last        Volu

In [17]:
# Insert data into MongoDB
data_dict = df.to_dict("records")  # Convert DataFrame to list of dictionaries
collection.insert_many(data_dict)  # Insert into the "market_data" collection

print("QQQ data successfully inserted into the 'market_data' collection.")

QQQ data successfully inserted into the 'market_data' collection.


**Load Canadian Bond ETF (XBB) Historical Data**

In [18]:
# load csv file as DataFrame
csv_file_path = "datasets/HistoricalData_XBB.csv"
df = pd.read_csv(csv_file_path)

# add a new column 'ticker' for identification
df['ticker'] = 'XBB'

# reveiw the data
print(df.head())
print(df.info())
print(df.describe())

         Date  Close/Last  Volume   Open   High      Low ticker
0  11/06/2024     40.4349  3250.0  40.28  40.52  40.2800    XBB
1  11/05/2024     40.4900  1844.0  40.34  40.49  40.3400    XBB
2  11/04/2024     40.2134  1071.0  40.31  40.31  40.2134    XBB
3  11/01/2024     40.2079   586.0  40.22  40.26  40.2079    XBB
4  10/31/2024     40.5300  7105.0  40.33  40.58  40.3300    XBB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 616 entries, 0 to 615
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Date        616 non-null    object 
 1   Close/Last  616 non-null    float64
 2   Volume      607 non-null    float64
 3   Open        616 non-null    float64
 4   High        616 non-null    float64
 5   Low         616 non-null    float64
 6   ticker      616 non-null    object 
dtypes: float64(5), object(2)
memory usage: 33.8+ KB
None
       Close/Last         Volume        Open        High         Low
count  616.00000

In [19]:
# Insert data into MongoDB
data_dict = df.to_dict("records")  # Convert DataFrame to list of dictionaries
collection.insert_many(data_dict)  # Insert into the "market_data" collection

print("XBB data successfully inserted into the 'market_data' collection.")

XBB data successfully inserted into the 'market_data' collection.
