# **Load Batch Data**

In [8]:
# importing the module
import os
import pandas as pd
import time
import finnhub
import wikipedia
from pymongo import MongoClient
import keys
import datetime

# setup finnhub and mongodb client
finnhub_client = finnhub.Client(api_key=keys.finnhub_api_key)
client = MongoClient(f"mongodb+srv://{keys.mongodb_username}:{keys.mongodb_password}@clusternex.c1ok7xn.mongodb.net/Stocks_Data")

### **1.1 Load Historical Data of All Stocks**

In [10]:
# read all csv in given folder and load data to databse
for dr in os.listdir("./historical-data/"):
    # read data
    data = pd.read_csv("./historical-data/"+dr)

    # apply some simpe transformations
    data.columns = ["date", "close", "volume", "open", "high", "low"]
    data["close"] = data["close"].apply(lambda x: float(str(x).removeprefix("$")))
    data["open"] = data["open"].apply(lambda x: float(str(x).removeprefix("$")))
    data["high"] = data["high"].apply(lambda x: float(str(x).removeprefix("$")))
    data["low"] = data["low"].apply(lambda x: float(str(x).removeprefix("$")))
    data["date"] = pd.to_datetime(data["date"])
    data = data.sort_values(by='date')

    # store data in mongodb having collection name same as file name
    collection = client["Stocks_Data"][dr.removesuffix(".csv")]
    collection.insert_many(data.to_dict("records"))
    
    print("Done Loading " + dr + " data...")

Done Loading AAPL.csv data...
Done Loading ABNB.csv data...
Done Loading ADBE.csv data...
Done Loading AEP.csv data...
Done Loading AMD.csv data...
Done Loading AMZN.csv data...
Done Loading ATVI.csv data...
Done Loading COKE.csv data...
Done Loading CSCO.csv data...
Done Loading EA.csv data...
Done Loading EBAY.csv data...
Done Loading F.csv data...
Done Loading GOOG.csv data...
Done Loading HON.csv data...
Done Loading INTC.csv data...
Done Loading META.csv data...
Done Loading MSFT.csv data...
Done Loading NFLX.csv data...
Done Loading NVDA.csv data...
Done Loading PYPL.csv data...
Done Loading QCOM.csv data...
Done Loading SBUX.csv data...
Done Loading TEAM.csv data...
Done Loading TSLA.csv data...
Done Loading TTWO.csv data...
Done Loading TXN.csv data...
Done Loading WBD.csv data...
Done Loading XPEV.csv data...


In [11]:
shares = ["AAPL", "GOOG", "MSFT", "CSCO", "META", "AMZN", "EBAY", "TSLA", "ADBE", "NFLX", "SBUX", "AMD", "NVDA", "QCOM", "INTC", "EA", "ATVI", "TTWO", "PYPL", "COKE", "TXN", "XPEV", "F", "ABNB", "AEP", "TEAM", "HON", "WBD"]

def DateTimeToTimeStamp(dateTime):
    """Converts Date to Time Stamp"""

    return datetime.datetime.strptime(str(dateTime), "%Y-%m-%d %H:%M:%S").timestamp().__int__()


def TimeStampToDateTime(timeStamp):
    """Convert Time Stamp to DateTime"""

    return datetime.datetime.fromtimestamp(timeStamp)


def TimeStampToDate(timeStamp):
    """Convert Time Stamp to Date"""

    return datetime.datetime.fromtimestamp(timeStamp).date()


def Extract_Data(time=None):
    """Extract Current Price Information of Stocks"""

    # Setup Finnhub client
    client = finnhub.Client(api_key=keys.finnhub_api_key)

    # Create a dataframe to store data
    data = pd.DataFrame(columns=["c", "h", "l", "o", "s", "t", "v", "symbol"])

    # current time
    if time == None:
        time = DateTimeToTimeStamp(datetime.datetime.now().isoformat(" ", "seconds"))

    # Extract data of each share
    for share in shares:
        dataDictionary = client.stock_candles(share, "D", time, time)
        dataDictionary["symbol"] = share
        data = pd.concat([data, pd.DataFrame(dataDictionary)], ignore_index=True)

    # change column names
    data.columns = [
        "close",
        "high",
        "low",
        "open",
        "status",
        "date",
        "volume",
        "symbol",
    ]

    return data

def Transform_Data(data):
    """Apply some simple Transformations"""
    
    data["date"] = data["date"].apply(TimeStampToDateTime)
    data = data.astype({"volume": int})
    data = data.drop("status", axis=1)

    data = data.reindex(
        columns=["date", "close", "volume", "open", "high", "low", "symbol"]
    )

    return data

def Load_Day_End_Data(data):
    """Loads Data to Mongodb Database"""

    # Setup Mongodb Connection
    url = f"mongodb+srv://{keys.mongodb_username}:{keys.mongodb_password}@clusternex.c1ok7xn.mongodb.net/Stocks_Data"
    client = MongoClient(url)

    # Select Database
    dataBaseObject = client["Stocks_Data"]["Real_Time_Data"]

    symbols = data["symbol"]
    data = data.drop("symbol", axis=1)
    data = data.to_dict("records")

    # insert day end data in database
    for s, d in zip(symbols, data):
        # Select collection
        dataBaseObject = client["Stocks_Data"][s]
        dataBaseObject.insert_one(d)

    client.close()

### **1.2 Load Any Previous Data that is not available upto yesterday's date from finnhub api**

In [13]:

collection = client["Stocks_Data"]["AAPL"]
data = collection.find({}).sort("date", -1).limit(1)

date = data[0]["date"]
date = date + datetime.timedelta(days=1, hours=18)

# if date is not current date and not saturday and sunday then insert date's data
while(date.date() < datetime.datetime.now().date() and date.date().weekday() < 5):
    data = Extract_Data(DateTimeToTimeStamp(date.isoformat(" ", "seconds")))
    data = Transform_Data(data)
    Load_Day_End_Data(data)
    date = date + datetime.timedelta(days=1, hours=18)


### **2.1 Fetch Company Stock Data from Finnhab API like Company Profile Data, Basic Financials Metrices, Annual Fianancials Data, Sell/Buy Recommendation etc.**

In [35]:
final_data = []

# fetch data from finnhub api
for s in shares:

    # company profile data 
    data = finnhub_client.company_profile2(symbol=s)
    profile_type = ["country", "currency", "exchange", "finnhubIndustry", "ipo", "logo", "name", "ticker", "weburl"]
    company_profile = {}
    for x in profile_type:
        try:
            company_profile[x] = data[x]
        finally:
            continue
    
    # remove ticker as it would be added later manually at top level of data    
    company_profile.pop("ticker")
    
    # company short descrtiption
    company_profile["description"] = wikipedia.summary(company_profile["name"], sentences = 5)
    
    # basic financials metrices
    data = finnhub_client.company_basic_financials(s, 'all')
    metric_types = ["13WeekPriceReturnDaily", "26WeekPriceReturnDaily", "52WeekHigh", "52WeekLow", "currentRatioAnnual", "quickRatioAnnual", "epsAnnual", "roiAnnual", "bookValuePerShareAnnual", "cashFlowPerShareAnnual", "dividendPerShareAnnual", "marketCapitalization", "netProfitMarginAnnual"]
    metric = {}
    for x in metric_types:
        try:
            metric[x] = data["metric"][x]
        finally:
            continue

    # annual financial data
    series_types = {"eps", "longtermDebtTotalEquity", "totalDebtToEquity", "cashRatio"}
    series = {}
    for x in series_types:
        try:
            series[x] = data["series"]["annual"][x]
        finally:
            continue

    # recommendations for the company stock
    data = finnhub_client.recommendation_trends(s)[0]
    data.pop("symbol")

    # add data to list
    final_data.append({"symbol": s, "basics": company_profile, "metric":metric, "series":series, "recommendations":data})

    # 2 seconds sleep so that, we won't exceed api call limit
    time.sleep(2)

### **2.2 Load Data to Database**

In [36]:
# store data in mongodb
client = client["Stocks_Data"]["Stocks_Information"]
client.insert_many(final_data)

<pymongo.results.InsertManyResult at 0x255f90cb0a0>