## Dividend Data from Yahoo Finance 

In [None]:
import pandas as pd
import yfinance as yf

# List of tickers
tickers = [
    "AAPL", "ABT", "AGG", "AMD", "AMGN", "AMZN", "ANET", "ARKK", "ATI", "AVGO", "BA",
    "BAC", "BMO", "BND", "C", "CAT", "CELH", "CLS", "CM", "CNQ", "COKE", "COST", "CRM",
    "CRWD", "CVNA", "CVX", "DBA", "DDOG", "DIA", "EA", "EEM", "EFA", "ENB", "FICO",
    "FIX", "GLD", "GNOM", "GOOGL", "GS", "HYG", "IBM", "ICLN", "IEF", "INTC", "INVA",
    "IWM", "JNJ", "JPM", "KO", "LLY", "LQD", "MA", "META", "MMM", "MRK", "MRVL", "MS",
    "MSFT", "MSTR", "NFLX", "NVDA", "ORCL", "PDBC", "PEP", "PFE", "PG", "PLTR", "PYPL",
    "QQQ", "RIVN", "RY", "SHOP", "SHY", "SLV", "SNOW", "SPY", "SU", "TAN", "TCEHY",
    "TD", "TLT", "TMO", "TSLA", "TSM", "TTD", "UBER", "UNG", "UNH", "USO", "V", "VEA",
    "VKTX", "VRT", "VST", "VWO", "WEAT", "WMT", "XBB", "XLE", "XLF", "XLK", "XLV", "XLY",
    "XOM", "ZG"
]

# Initialize a list to store the summary data
dividend_data = []

# Loop through each ticker
for ticker in tickers:
    try:
        # Fetch the stock data
        stock = yf.Ticker(ticker)
        dividends = stock.dividends  # Get the dividend history

        if not dividends.empty:
            # Ensure dates are sorted
            dividends = dividends.sort_index()

            # Calculate average dividend per share
            avg_dividend = dividends.mean()

            # Determine the frequency of dividend payments based on intervals
            intervals = dividends.index.to_series().diff().dt.days.dropna()
            avg_interval = intervals.mean()
            if avg_interval <= 45:
                frequency = "Monthly"
            elif avg_interval <= 135:
                frequency = "Quarterly"
            elif avg_interval <= 225:
                frequency = "Semi-Annual"
            else:
                frequency = "Annual"
        else:
            # If no dividend history exists
            avg_dividend = "N/A"
            frequency = "N/A"
    except Exception as e:
        # Handle errors and log N/A for problematic tickers
        print(f"Error fetching data for {ticker}: {e}")
        avg_dividend = "N/A"
        frequency = "N/A"

    # Append the result to the list
    dividend_data.append({
        "Ticker": ticker,
        "Dividend Frequency": frequency,
        "Average Dividend per Share": avg_dividend
    })

# Convert the summary data to a DataFrame
dividend_df = pd.DataFrame(dividend_data)

# Save the DataFrame to a CSV file
dividend_df.to_csv("dividend_summary_all_tickers.csv", index=False)

# Display the summary
print(dividend_df)


## Insert the info in mongodb collection 

In [16]:
import pandas as pd
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient("mongodb+srv://user1:12345@cluster0.s5hw0.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
db = client["robo_advisor"]
collection = db["asset_metadata"]

# Load the CSV file into a DataFrame
csv_file_path = "dividend_summary_all_tickers.csv" 
dividend_data = pd.read_csv(csv_file_path)

# Loop through each row in the CSV file
for index, row in dividend_data.iterrows():
    ticker = row["Ticker"]
    dividend_frequency = row["Dividend Frequency"]
    avg_dividend_per_share = row["Average Dividend per Share"]

    # Check if the document exists and if the fields are already present
    existing_record = collection.find_one({"Ticker": ticker})

    if existing_record:
        # Check if "Dividend Frequency" and "Average Dividend per Share" exist
        if "Dividend Frequency" in existing_record and "Average Dividend per Share" in existing_record:
            print(f"Ticker {ticker} already has the fields. Skipping update.")
        else:
            # Update the existing document with the new fields
            collection.update_one(
                {"Ticker": ticker},
                {"$set": {
                    "Dividend Frequency": dividend_frequency,
                    "Average Dividend per Share": avg_dividend_per_share
                }}
            )
            print(f"Updated Ticker {ticker} with new fields.")
    else:
        # If the ticker doesn't exist in the collection
        print(f"Ticker {ticker} not found in the collection. Skipping.")

print("Process completed.")


Ticker AAPL already has the fields. Skipping update.
Updated Ticker ABT with new fields.
Ticker AGG already has the fields. Skipping update.
Updated Ticker AMD with new fields.
Updated Ticker AMGN with new fields.
Updated Ticker AMZN with new fields.
Updated Ticker ANET with new fields.
Updated Ticker ARKK with new fields.
Updated Ticker ATI with new fields.
Updated Ticker AVGO with new fields.
Updated Ticker BA with new fields.
Ticker BAC already has the fields. Skipping update.
Updated Ticker BMO with new fields.
Ticker BND already has the fields. Skipping update.
Ticker C already has the fields. Skipping update.
Updated Ticker CAT with new fields.
Updated Ticker CELH with new fields.
Updated Ticker CLS with new fields.
Updated Ticker CM with new fields.
Updated Ticker CNQ with new fields.
Updated Ticker COKE with new fields.
Ticker COST already has the fields. Skipping update.
Updated Ticker CRM with new fields.
Updated Ticker CRWD with new fields.
Updated Ticker CVNA with new field

In [19]:
from pymongo import MongoClient
import numpy as np

# Connect to MongoDB
client = MongoClient("mongodb+srv://user1:12345@cluster0.s5hw0.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0")
db = client["robo_advisor"]
collection = db["asset_metadata"]

# Fetch all documents
documents = collection.find()  # Retrieve all documents

for doc in documents:
    update_needed = False
    update_fields = {}

    # Check for "Dividend Frequency" field
    if "Dividend Frequency" in doc:
        if doc["Dividend Frequency"] is None or isinstance(doc["Dividend Frequency"], float) and np.isnan(doc["Dividend Frequency"]):
            update_fields["Dividend Frequency"] = "N/A"
            update_needed = True

    # Check for "Average Dividend per Share" field
    if "Average Dividend per Share" in doc:
        if doc["Average Dividend per Share"] is None or isinstance(doc["Average Dividend per Share"], float) and np.isnan(doc["Average Dividend per Share"]):
            update_fields["Average Dividend per Share"] = "N/A"
            update_needed = True

    # Perform the update if needed
    if update_needed:
        collection.update_one({"_id": doc["_id"]}, {"$set": update_fields})
        print(f"Updated document with _id: {doc['_id']}")

print("All NaN and None values have been replaced with 'N/A'.")


Updated document with _id: 67426fce83f818ba3593f4d1
Updated document with _id: 67426fce83f818ba3593f4da
Updated document with _id: 67426fce83f818ba3593f4f9
Updated document with _id: 67426fce83f818ba3593f50c
Updated document with _id: 67426fce83f818ba3593f4e8
Updated document with _id: 67426fce83f818ba3593f4ff
Updated document with _id: 67426fce83f818ba3593f507
Updated document with _id: 67426fce83f818ba3593f4d4
Updated document with _id: 67426fce83f818ba3593f4df
Updated document with _id: 67426fce83f818ba3593f4e0
Updated document with _id: 67426fce83f818ba3593f509
Updated document with _id: 67426fce83f818ba3593f4d3
Updated document with _id: 67426fce83f818ba3593f4db
Updated document with _id: 67426fce83f818ba3593f4e1
Updated document with _id: 67426fce83f818ba3593f4fa
Updated document with _id: 67426fce83f818ba3593f500
Updated document with _id: 67426fce83f818ba3593f50a
Updated document with _id: 67426fce83f818ba3593f515
Updated document with _id: 6743bb052a0c1abf3501541b
Updated docu