## Dividend Data from Yahoo Finance 

In [15]:
import pandas as pd
import yfinance as yf
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient("mongodb+srv://user1:12345@cluster0.s5hw0.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
db = client["robo_advisor"]
collection = db["asset_metadata"]

# Fetch all documents and extract valid tickers
tickers_cursor = collection.find({}, {"Ticker": 1})  # Fetch all documents with the Ticker field
tickers = [doc.get("Ticker") for doc in tickers_cursor if doc.get("Ticker")]  # Extract tickers safely

print(tickers)
print(len(tickers))

['AMD', 'COKE', 'EA', 'IEF', 'JPM', 'KO', 'LLY', 'BAC', 'CELH', 'DIA', 'IWM', 'JNJ', 'MSTR', 'PEP', 'AGG', 'C', 'FIX', 'HYG', 'IBM', 'SU', 'TLT', 'TSM', 'VEA', 'VKTX', 'VRT', 'BND', 'EEM', 'GLD', 'META', 'MRK', 'MRVL', 'PLTR', 'SPY', 'TSLA', 'ANET', 'CNQ', 'CRWD', 'CVNA', 'EFA', 'INTC', 'NVDA', 'SHY', 'TTD', 'AAPL', 'CRM', 'FICO', 'LQD', 'PFE', 'QQQ', 'VST', 'XLK', 'AMGN', 'AMZN', 'AVGO', 'CLS', 'DDOG', 'INVA', 'NFLX', 'PG', 'PYPL', 'TCEHY', 'UBER', 'VWO', 'XLE', 'XLF', 'XLV', 'XLY', 'ZG', 'ATI', 'GOOGL', 'MSFT', 'ABT', 'ARKK', 'BA', 'BMO', 'CAT', 'CM', 'COST', 'CVX', 'DBA', 'ENB', 'GNOM', 'ICLN', 'MA', 'MMM', 'ORCL', 'RIVN', 'RY', 'SHOP', 'SLV', 'SNOW', 'TAN', 'TD', 'TMO', 'UNH', 'USO', 'V', 'WEAT', 'WMT', 'XBB', 'XOM', 'GS', 'MS', 'UNG', 'PDBC', 'AMSC', 'BMA', 'FNGU', 'INDU', 'INOD', 'LBTYB', 'NDX', 'NQX', 'USD', 'USLM', 'WLFC']
116


In [16]:
# List of tickers
tickers = ['AMD', 'COKE', 'EA', 'IEF', 'JPM', 'KO', 'LLY', 'BAC', 'CELH', 'DIA', 'IWM', 'JNJ', 'MSTR', 'PEP', 'AGG', 'C', 'FIX', 'HYG', 'IBM', 'SU', 'TLT', 'TSM', 'VEA', 'VKTX', 'VRT', 'BND', 'EEM', 'GLD', 'META', 'MRK', 'MRVL', 'PLTR', 'SPY', 'TSLA', 'ANET', 'CNQ', 'CRWD', 'CVNA', 'EFA', 'INTC', 'NVDA', 'SHY', 'TTD', 'AAPL', 'CRM', 'FICO', 'LQD', 'PFE', 'QQQ', 'VST', 'XLK', 'AMGN', 'AMZN', 'AVGO', 'CLS', 'DDOG', 'INVA', 'NFLX', 'PG', 'PYPL', 'TCEHY', 'UBER', 'VWO', 'XLE', 'XLF', 'XLV', 'XLY', 'ZG', 'ATI', 'GOOGL', 'MSFT', 'ABT', 'ARKK', 'BA', 'BMO', 'CAT', 'CM', 'COST', 'CVX', 'DBA', 'ENB', 'GNOM', 'ICLN', 'MA', 'MMM', 'ORCL', 'RIVN', 'RY', 'SHOP', 'SLV', 'SNOW', 'TAN', 'TD', 'TMO', 'UNH', 'USO', 'V', 'WEAT', 'WMT', 'XBB', 'XOM', 'GS', 'MS', 'UNG', 'PDBC', 'AMSC', 'BMA', 'FNGU', 'INDU', 'INOD', 'LBTYB', 'NDX', 'NQX', 'USD', 'USLM', 'WLFC']

# Initialize a list to store the summary data
dividend_data = []

# Loop through each ticker
for ticker in tickers:
    try:
        # Fetch the stock data
        stock = yf.Ticker(ticker)
        dividends = stock.dividends  # Get the dividend history

        if not dividends.empty:
            # Ensure dates are sorted
            dividends = dividends.sort_index()

            # Calculate average dividend per share
            avg_dividend = dividends.mean()

            # Determine the frequency of dividend payments based on intervals
            intervals = dividends.index.to_series().diff().dt.days.dropna()
            avg_interval = intervals.mean()
            if avg_interval <= 45:
                frequency = "Monthly"
            elif avg_interval <= 135:
                frequency = "Quarterly"
            elif avg_interval <= 225:
                frequency = "Semi-Annual"
            else:
                frequency = "Annual"
        else:
            # If no dividend history exists
            avg_dividend = "N/A"
            frequency = "N/A"
    except Exception as e:
        # Handle errors and log N/A for problematic tickers
        print(f"Error fetching data for {ticker}: {e}")
        avg_dividend = "N/A"
        frequency = "N/A"

    # Append the result to the list
    dividend_data.append({
        "Ticker": ticker,
        "Dividend Frequency": frequency,
        "Average Dividend per Share": avg_dividend
    })

# Convert the summary data to a DataFrame
dividend_df = pd.DataFrame(dividend_data)

# Save the DataFrame to a CSV file
dividend_df.to_csv("dividend_summary_all_tickers.csv", index=False)

# Display the summary
print(dividend_df)


    Ticker Dividend Frequency Average Dividend per Share
0      AMD                N/A                        N/A
1     COKE          Quarterly                   0.395789
2       EA          Quarterly                     0.1825
3      IEF            Monthly                   0.224545
4      JPM          Quarterly                   0.370825
..     ...                ...                        ...
111    NDX                N/A                        N/A
112    NQX                N/A                        N/A
113    USD          Quarterly                   0.008073
114   USLM        Semi-Annual                   0.034156
115   WLFC          Quarterly                        0.5

[116 rows x 3 columns]


## Insert the info in mongodb collection 

In [17]:
import pandas as pd
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient("mongodb+srv://user1:12345@cluster0.s5hw0.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
db = client["robo_advisor"]
collection = db["asset_metadata"]

# Load the CSV file into a DataFrame
csv_file_path = "dividend_summary_all_tickers.csv" 
dividend_data = pd.read_csv(csv_file_path)

# Loop through each row in the CSV file
for index, row in dividend_data.iterrows():
    ticker = row["Ticker"]
    dividend_frequency = row["Dividend Frequency"]
    avg_dividend_per_share = row["Average Dividend per Share"]

    # Check if the document exists and if the fields are already present
    existing_record = collection.find_one({"Ticker": ticker})

    if existing_record:
        # Check if "Dividend Frequency" and "Average Dividend per Share" exist
        if "Dividend Frequency" in existing_record and "Average Dividend per Share" in existing_record:
            print(f"Ticker {ticker} already has the fields. Skipping update.")
        else:
            # Update the existing document with the new fields
            collection.update_one(
                {"Ticker": ticker},
                {"$set": {
                    "Dividend Frequency": dividend_frequency,
                    "Average Dividend per Share": avg_dividend_per_share
                }}
            )
            print(f"Updated Ticker {ticker} with new fields.")
    else:
        # If the ticker doesn't exist in the collection
        print(f"Ticker {ticker} not found in the collection. Skipping.")

print("Process completed.")


Ticker AMD already has the fields. Skipping update.
Ticker COKE already has the fields. Skipping update.
Ticker EA already has the fields. Skipping update.
Ticker IEF already has the fields. Skipping update.
Ticker JPM already has the fields. Skipping update.
Ticker KO already has the fields. Skipping update.
Ticker LLY already has the fields. Skipping update.
Ticker BAC already has the fields. Skipping update.
Ticker CELH already has the fields. Skipping update.
Ticker DIA already has the fields. Skipping update.
Ticker IWM already has the fields. Skipping update.
Ticker JNJ already has the fields. Skipping update.
Ticker MSTR already has the fields. Skipping update.
Ticker PEP already has the fields. Skipping update.
Ticker AGG already has the fields. Skipping update.
Ticker C already has the fields. Skipping update.
Ticker FIX already has the fields. Skipping update.
Ticker HYG already has the fields. Skipping update.
Ticker IBM already has the fields. Skipping update.
Ticker SU alr

In [18]:
from pymongo import MongoClient
import numpy as np

# Connect to MongoDB
client = MongoClient("mongodb+srv://user1:12345@cluster0.s5hw0.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
db = client["robo_advisor"]
collection = db["asset_metadata"]

# Fetch all documents
documents = collection.find()  # Retrieve all documents

for doc in documents:
    update_needed = False
    update_fields = {}

    # Check for "Dividend Frequency" field
    if "Dividend Frequency" in doc:
        if doc["Dividend Frequency"] is None or isinstance(doc["Dividend Frequency"], float) and np.isnan(doc["Dividend Frequency"]):
            update_fields["Dividend Frequency"] = "N/A"
            update_needed = True

    # Check for "Average Dividend per Share" field
    if "Average Dividend per Share" in doc:
        if doc["Average Dividend per Share"] is None or isinstance(doc["Average Dividend per Share"], float) and np.isnan(doc["Average Dividend per Share"]):
            update_fields["Average Dividend per Share"] = "N/A"
            update_needed = True

    # Perform the update if needed
    if update_needed:
        collection.update_one({"_id": doc["_id"]}, {"$set": update_fields})
        print(f"Updated document with _id: {doc['_id']}")

print("All NaN and None values have been replaced with 'N/A'.")


All NaN and None values have been replaced with 'N/A'.
