In [8]:
import pandas as pd
import numpy as np
import tidyxbrl
import plotly.express as px
import plotly.graph_objects as go

from src.tidyxbrl.xbrl_apikey import *
from src.tidyxbrl.xbrl_query import *
from src.tidyxbrl.xbrl_parse import *
from src.tidyxbrl.edgar_query import *
from src.tidyxbrl.edgar_cik import *
from src.tidyxbrl.edgar_frames import *

pd.set_option("display.max_colwidth", None)

In [9]:
companylist = [
    "avison young",
    "brookfield asset management",
    "jones lang lasalle inc",
    "cbre group, inc.",
    "cushman & wakefield plc",
    "newmark group, inc",
    "colliers international group inc.",
    "brookfield asset management",
    "american tower",
    "prologis",
    "crown castle international",
    "weyerhaeuser",
    "equinix",
    "equity residential",
    "public storage",
    "alexandria real estate",
    "avalonbay communities",
    "digital realty trust",
    "boston properties",
    "segro",
    "vici properties",
    "ventas",
    "welltower",
    "realty income corporation",
    "annaly capital management",
    "essex property trust"
]

company_cik_list = pd.DataFrame()
for company in companylist:
    company_cik_list = pd.concat([
        company_cik_list,
        edgar_cik(company, comprehensive=False)
        ]
    )

company_cik_list =company_cik_list.reset_index(drop=True).drop_duplicates()

print(company_cik_list.to_string())

Start Row: 0 - Avison Young (Canada) Inc.
Start Row: 0 - BROOKFIELD Corp /ON/SIC: 6512 - OPERATORS OF NONRESIDENTIAL BUILDINGS
Start Row: 0 - JONES LANG LASALLE INCSIC: 6531 - REAL ESTATE AGENTS & MANAGERS (FOR OTHERS)
Start Row: 0 - BROOKFIELD Corp /ON/SIC: 6512 - OPERATORS OF NONRESIDENTIAL BUILDINGS
Start Row: 0 - AMERICAN TOWER CORPSIC: 3669 - COMMUNICATIONS EQUIPMENT, NEC
Start Row: 0 - PROLOGISSIC: 6798 - REAL ESTATE INVESTMENT TRUSTS
Start Row: 0 - WEYERHAEUSER COSIC: 6798 - REAL ESTATE INVESTMENT TRUSTS
Start Row: 0 - Equinix Europe 2 Financing Corp LLC
Start Row: 0 - EQUITY RESIDENTIALSIC: 6798 - REAL ESTATE INVESTMENT TRUSTS
Start Row: 0 - Public StorageSIC: 6798 - REAL ESTATE INVESTMENT TRUSTS
Start Row: 0 - ALEXANDRIA REAL ESTATE EQUITIES, INC.SIC: 6798 - REAL ESTATE INVESTMENT TRUSTS
Start Row: 0 - DIGITAL REALTY TRUST, INC.SIC: 6798 - REAL ESTATE INVESTMENT TRUSTS
Start Row: 0 - BOSTON PROPERTIES INCSIC: 6798 - REAL ESTATE INVESTMENT TRUSTS
Final Row Reached At 0
Start Ro

In [10]:
%%capture

### Series

submissions_raw = pd.DataFrame()
company_facts_raw = pd.DataFrame()

for index, desiredcorp in company_cik_list.iterrows():
    print(desiredcorp)
    try:
        submissions = edgar_query(desiredcorp.cik_str, query_type="submissions", parse_pandas=True)
    except:
        submissions = pd.DataFrame()
    try:
        companyfacts = edgar_query(desiredcorp.cik_str, query_type="companyfacts", parse_pandas=True)
    except:
        companyfacts = pd.DataFrame()
    submissions_raw = pd.concat([submissions_raw, submissions])
    company_facts_raw = pd.concat([company_facts_raw, companyfacts])

submissions = pd.merge(
    company_cik_list, submissions_raw, how="left", left_on=["cik"], right_on=["cik"]
)
company_facts = pd.merge(
    company_cik_list,
    company_facts_raw,
    how="left",
    left_on=["cik"],
    right_on=["cik"],
)



### Parallel (Not Reccomended)

# import concurrent.futures

# # Define a function to process each desiredcorp
# def process_desiredcorp(desiredcorp):
#     try:
#         submissions = edgar_query(desiredcorp.cik_str, query_type="submissions", parse_pandas=True)
#     except:
#         submissions = pd.DataFrame()
#     try:
#         companyfacts = edgar_query(desiredcorp.cik_str, query_type="companyfacts", parse_pandas=True)
#     except:
#         companyfacts = pd.DataFrame()
#     return submissions, companyfacts

# # Create an empty list to store the results
# results = []

# # Create a ProcessPoolExecutor
# with concurrent.futures.ProcessPoolExecutor() as executor:
#     # Submit each desiredcorp to the executor
#     futures = [executor.submit(process_desiredcorp, desiredcorp) for index, desiredcorp in company_cik_list.iterrows()]
    
#     # Retrieve the results as they become available
#     for future in concurrent.futures.as_completed(futures):
#         results.append(future.result())

# # Concatenate the results
# submissions_raw = pd.concat([result[0] for result in results])
# company_facts_raw = pd.concat([result[1] for result in results])

# # Merge the dataframes
# submissions = pd.merge(company_cik_list, submissions_raw, how="left", left_on=["cik"], right_on=["cik"])
# company_facts = pd.merge(company_cik_list, company_facts_raw, how="left", left_on=["cik"], right_on=["cik"])


In [41]:
print(
    company_facts
    .query("variable.notna()")
    .groupby(["variable"])
    .count()
    .sort_values(by=["cik"], ascending=False)
    .query("cik >= cik.max()/2 & variable.str.contains('units.USD')")
    .to_string()
);

                                                                                                                                        cik  cik_str  company  state  value
variable                                                                                                                                                                   
facts.us-gaap.CashAndCashEquivalentsAtCarryingValue.units.USD                                                                            27       27       27     27     27
facts.us-gaap.StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest.units.USD                                           27       27       27     27     27
facts.us-gaap.NetCashProvidedByUsedInOperatingActivities.units.USD                                                                       27       27       27     27     27
facts.us-gaap.NetCashProvidedByUsedInInvestingActivities.units.USD                                                                       27 

In [45]:
plotting_values = [
    "facts.us-gaap.InterestExpense.units.USD",
    "facts.us-gaap.NetIncomeLoss.units.USD",
]  #

raw_plotting_data = (
    pd.concat(
        [
            row.value.reset_index(drop=True).assign(
                company=row.company, variable=row.variable
            )
            for _, row in company_facts[
                company_facts.variable.isin(plotting_values)
            ].iterrows()
        ],
        ignore_index=True,
    )
    .reset_index(drop=True)
    .assign(filed=lambda x: pd.to_datetime(x.filed))
    .query("form == '10-Q'")
    .assign(
        # fp = lambda x: np.where(x.fp == "FY", "Q4", x.fp),
        date=lambda x: pd.to_datetime(x["fy"].astype(str) + "-" + x["fp"].astype(str)),
        start=lambda x: pd.to_datetime(x.start),
        end=lambda x: pd.to_datetime(x.end),
        frame=lambda x: x.frame.fillna("months_ended"),
    )
    .assign(
        frame_date=lambda x: pd.to_datetime(
            x.frame.str.replace("CY", ""), errors="coerce"
        )
    )
    .query("frame_date.isna() | frame_date ==date")
)

plotting_data = (
    raw_plotting_data.groupby(["date", "company", "variable", "frame"])
    .apply(lambda x: x.loc[x.end.idxmax()])
    .reset_index(drop=True)
)

plotting_data = (
    plotting_data.pivot(
        index=["date", "accn", "fy", "fp", "form", "filed", "frame", "company"],
        columns="variable",
        values="val",
    )
    .reset_index()
    .sort_values(by=["date", "company"])
    .reset_index(drop=True)
)

plotting_data = (
    plotting_data.sort_values(["form", "company", "date", "frame", "fy"])
    .groupby(["date", "fy", "form", "company", "frame"])
    .agg({value: lambda x: x.sum(min_count=1) for value in plotting_values})
    .reset_index(drop=False)
    .sort_values(["form", "company", "date", "frame", "fy"])
    .reset_index(drop=False)
    .assign(quarter=lambda x: x.date.dt.quarter)
)

for columnholder in plotting_values:
    plotting_data[columnholder] = plotting_data.groupby(["form", "company", "date"])[
        columnholder
    ].ffill()
    plotting_data.loc[
        ((plotting_data.frame == "months_ended") & (plotting_data.quarter != 1)),
        columnholder,
    ] = plotting_data.groupby(["form", "company", "fy"])[columnholder].apply(
        lambda x: (x - x.shift(2))
    )

plotting_data = plotting_data.assign(
    interest_percent=lambda x: np.where(
        (
            100
            * (x["facts.us-gaap.InterestExpense.units.USD"])
            / (
                x["facts.us-gaap.InterestExpense.units.USD"]
                + x["facts.us-gaap.NetIncomeLoss.units.USD"]
            )
            > -500
        )
        & (
            100
            * (x["facts.us-gaap.InterestExpense.units.USD"])
            / (
                x["facts.us-gaap.InterestExpense.units.USD"]
                + x["facts.us-gaap.NetIncomeLoss.units.USD"]
            )
            < 500
        )
        & (
            x["facts.us-gaap.InterestExpense.units.USD"].notna()
            & x["facts.us-gaap.NetIncomeLoss.units.USD"].notna()
        ),
        100
        * (x["facts.us-gaap.InterestExpense.units.USD"])
        / (
            x["facts.us-gaap.InterestExpense.units.USD"]
            + x["facts.us-gaap.NetIncomeLoss.units.USD"]
        ),
        np.nan,
    )
).sort_values(["form", "company", "date", "fy"])

plotting_values = np.unique(plotting_values + ["interest_percent"])


Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)


Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)



In [46]:
# Define the y-values
y_values = plotting_values  # replace with your y-value column names
title_list = plotting_data["company"].unique().tolist()

# Define the colormap
color_sequence = px.colors.qualitative.Plotly

# Create the base figure
fig = go.Figure()

# Add traces for each y-value and company
for y_value in y_values:
    for i, company in enumerate(title_list):
        fig.add_trace(
            go.Scatter(
                x=plotting_data[
                    (plotting_data["company"] == company) & (plotting_data[y_value].notna())
                ]["date"],
                y=plotting_data[
                    (plotting_data["company"] == company) & (plotting_data[y_value].notna())
                ][y_value],
                mode="lines+markers",
                name=company,
                legendgroup=company,
                line=dict(color=color_sequence[i % len(color_sequence)]),
                visible=(
                    y_value == y_values[0]
                ),  # only the traces for the first y-value are visible initially
            )
        )

# Define the dropdown menu
dropdown_menu = [
    dict(
        args=[
            {"visible": [y_value == y for y in y_values for _ in title_list]}
        ],  # update the 'visible' attribute of the traces
        label=y_value,
        method="restyle",
    )
    for y_value in y_values
]

# Update the layout
fig.update_layout(
    updatemenus=[
        dict(
            buttons=dropdown_menu,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0,
            xanchor="left",
            y=1.1,
            yanchor="top",
        ),
    ],
    autosize=True,
)

fig.show()