# Australia data
Data source: Compustat Global - Fundamentals Annual

Date range: 1987-06 - 2023-08   

Identifiers:
- datadate
- Global Company Key (gvkey)
- International Security ID (ISIN)
- FYEAR -- Data Year - Fiscal (FYEAR)
- SEDOL (SEDOL)
- CONML -- Company Legal Name (CONML)
- FIC -- ISO Country Code - Incorporation (FIC)
- CITY -- City (CITY)
- SIC -- Standard Industry Classification Code (SIC)



In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
def winsorize_series(s: pd.Series, limits=[0.05, 0.95], msk=None) -> pd.Series:
    s = s.copy()
    q = s.quantile(limits)
    if isinstance(q, pd.Series) and len(q) == 2:
        if msk is None:
            s[s < q.iloc[0]] = q.iloc[0]
            s[s > q.iloc[1]] = q.iloc[1]
        else:
            s[s < q.iloc[0]] = msk
            s[s > q.iloc[1]] = msk
    return s

In [3]:
# load data
df = pd.read_csv(
    "./data/wovz34pxq1fe4z1r.csv").dropna(subset=["datadate", "fyear", "isin"])
df.rename(columns={"fyear": "year"}, inplace=True)
df

Unnamed: 0,gvkey,year,datadate,at,capx,ceq,che,dlc,dltt,ebitda,...,isin,sedol,cshoi,epsexcon,epsincon,nicon,tstkni,fic,city,sic
0,5302,1996.0,1996-06-30,4.724,0.862,4.595,1.117,0.064,0.000,-1.341,...,AU0000185597,BL55PB2,73.158,-0.018,-0.018,-0.979,,AUS,Subiaco,1040.0
1,5302,1997.0,1997-06-30,8.626,1.596,7.523,1.412,0.033,0.000,-1.508,...,AU0000185597,BL55PB2,80.157,-0.013,-0.013,-1.076,,AUS,Subiaco,1040.0
2,5302,1998.0,1998-06-30,8.234,0.773,6.391,0.530,0.000,0.000,-2.149,...,AU0000185597,BL55PB2,105.196,-0.046,-0.046,-4.082,0.0,AUS,Subiaco,1040.0
3,5302,1999.0,1999-06-30,9.182,0.272,9.093,2.182,0.000,0.000,-2.422,...,AU0000185597,BL55PB2,107.377,-0.144,-0.144,,,AUS,Subiaco,1040.0
4,5302,2000.0,2000-06-30,7.837,0.347,5.529,3.007,0.000,0.000,-2.092,...,AU0000185597,BL55PB2,151.405,-0.108,-0.108,-11.693,,AUS,Subiaco,1040.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47542,356597,2022.0,2022-12-31,0.144,0.015,0.115,0.122,0.000,0.000,-0.045,...,AU0000274284,BMBR1L1,,,,,,AUS,Perth,1000.0
47543,357325,2019.0,2019-06-30,301.823,1.916,141.464,22.701,98.527,0.000,49.001,...,AU0000284895,BQ81P89,,,,,,AUS,Minto,5160.0
47544,357325,2020.0,2020-06-30,394.393,2.138,142.849,19.171,128.876,40.300,66.095,...,AU0000284895,BQ81P89,,,,,,AUS,Minto,5160.0
47545,357325,2021.0,2021-06-30,428.724,2.360,182.128,23.244,42.663,111.616,73.885,...,AU0000284895,BQ81P89,,,,,,AUS,Minto,5160.0


In [4]:
market_data = pd.read_csv("./data//ifl6dljmnzoyi6g6.csv")
# keep year and month
df["datadate"] = pd.to_datetime(df["datadate"]).dt.strftime("%Y-%m")
market_data["datadate"] = pd.to_datetime(
    market_data["datadate"]).dt.strftime("%Y-%m")
market_data

Unnamed: 0,gvkey,datadate,prccm,isin,fic
0,5302,2004-01,0.2000,AU0000185597,AUS
1,5302,2004-02,0.1750,AU0000185597,AUS
2,5302,2004-03,0.1400,AU0000185597,AUS
3,5302,2004-04,0.1500,AU0000185597,AUS
4,5302,2004-05,0.1200,AU0000185597,AUS
...,...,...,...,...,...
464073,356294,2023-07,0.2250,AU0000273088,AUS
464074,356294,2023-06,0.1501,,AUS
464075,356294,2023-07,0.1340,,AUS
464076,356597,2023-06,0.2000,AU0000274284,AUS


In [5]:
# merge data
df = pd.merge(df, market_data, on=["isin", "datadate"], how="left")
df

Unnamed: 0,gvkey_x,year,datadate,at,capx,ceq,che,dlc,dltt,ebitda,...,epsexcon,epsincon,nicon,tstkni,fic_x,city,sic,gvkey_y,prccm,fic_y
0,5302,1996.0,1996-06,4.724,0.862,4.595,1.117,0.064,0.000,-1.341,...,-0.018,-0.018,-0.979,,AUS,Subiaco,1040.0,,,
1,5302,1997.0,1997-06,8.626,1.596,7.523,1.412,0.033,0.000,-1.508,...,-0.013,-0.013,-1.076,,AUS,Subiaco,1040.0,,,
2,5302,1998.0,1998-06,8.234,0.773,6.391,0.530,0.000,0.000,-2.149,...,-0.046,-0.046,-4.082,0.0,AUS,Subiaco,1040.0,,,
3,5302,1999.0,1999-06,9.182,0.272,9.093,2.182,0.000,0.000,-2.422,...,-0.144,-0.144,,,AUS,Subiaco,1040.0,,,
4,5302,2000.0,2000-06,7.837,0.347,5.529,3.007,0.000,0.000,-2.092,...,-0.108,-0.108,-11.693,,AUS,Subiaco,1040.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46005,356597,2022.0,2022-12,0.144,0.015,0.115,0.122,0.000,0.000,-0.045,...,,,,,AUS,Perth,1000.0,,,
46006,357325,2019.0,2019-06,301.823,1.916,141.464,22.701,98.527,0.000,49.001,...,,,,,AUS,Minto,5160.0,,,
46007,357325,2020.0,2020-06,394.393,2.138,142.849,19.171,128.876,40.300,66.095,...,,,,,AUS,Minto,5160.0,,,
46008,357325,2021.0,2021-06,428.724,2.360,182.128,23.244,42.663,111.616,73.885,...,,,,,AUS,Minto,5160.0,,,


In [6]:
# drop duplicates
df = df.sort_values(by=["isin", "datadate"], ascending=[False, True])
del df["datadate"]
df = df.drop_duplicates(subset=["isin", "year"], keep="first")
df

Unnamed: 0,gvkey_x,year,at,capx,ceq,che,dlc,dltt,ebitda,ppent,...,epsexcon,epsincon,nicon,tstkni,fic_x,city,sic,gvkey_y,prccm,fic_y
45735,351061,2020.0,113.177,,71.514,43.529,21.077,0.000,84.181,0.052,...,,,,,AUS,Austin,7372.0,,,
45736,351061,2021.0,279.136,,245.543,86.239,,,61.268,0.225,...,,,,,AUS,Austin,7372.0,351061.0,2.040,AUS
45737,351061,2022.0,194.821,0.066,172.375,79.493,,,-8.901,0.174,...,-0.2064,-0.2064,-91.474,,AUS,Austin,7372.0,351061.0,0.635,AUS
2581,142795,1997.0,5.750,0.305,2.013,0.002,1.991,0.000,-2.396,5.664,...,-0.3407,-0.3407,-5.400,,AUS,Coomera,4950.0,,,
2582,142795,1998.0,7.118,0.660,3.645,0.811,2.286,0.500,-0.851,6.170,...,-0.1498,-0.0531,-0.493,,AUS,Coomera,4950.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45001,341392,2018.0,3.154,,2.854,,,,0.721,0.330,...,0.1453,0.1453,0.441,,AUS,Canterbury,6020.0,341392.0,0.800,AUS
45002,341392,2019.0,3.280,,2.880,,,,0.667,0.277,...,0.1348,0.1348,0.397,,AUS,Canterbury,6020.0,341392.0,0.850,AUS
45003,341392,2020.0,6.599,,3.325,,0.371,2.273,1.454,2.791,...,0.2568,0.2568,0.717,,AUS,Canterbury,6020.0,341392.0,0.860,AUS
45004,341392,2021.0,6.229,,3.432,,0.300,1.973,0.876,3.045,...,0.1116,0.1116,0.304,,AUS,Canterbury,6020.0,341392.0,0.950,AUS


In [7]:
import os
if not os.path.exists('output'):
    os.makedirs('output')

1. Capex $_{t}$ / AT $_{t-1}$

In [8]:
df["CapitalExpenditures/TotalAsset"] = df["capx"] / df["at"] * 100
df["CapitalExpenditures/TotalAsset"] = winsorize_series(
    df["CapitalExpenditures/TotalAsset"], limits=[0.01, 0.99], msk=None)
df[["isin", "year", "CapitalExpenditures/TotalAsset"]].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
CapitalExpenditures/TotalAsset,34122.0,10.85083,19.106846,0.012321,1.098059,4.048133,12.678921,140.361832


In [9]:
result_json = df[["isin", "year", "CapitalExpenditures/TotalAsset"
                 ]].to_json(orient='split', index=False)
result_dict = json.loads(result_json)

In [10]:
output_dict = {
    "varibale_name": "CapitalExpenditures/TotalAsset",
    "variable_description": "Capital Expenditures (CAPX) / Total Assets (AT). Winsorized at 1\% and 99\% level.",
    "data_source": "Compustat Global - Fundamentals Annual",
    "reference": "",
    "identifier_entity": "isin",
    "identifier_time": "year",
}
output_dict["data"] = result_dict["data"]

In [11]:
with open("./output/aus_capexatratio.json", "w") as file:
     json.dump(output_dict, file, indent=4)

2. Capex $_{t}$ / PPENT $_{t-1}$

In [12]:
df["CapitalExpenditures/PropertyPlant&Equipment"] = df["capx"] / df["ppent"] * 100
# inf -> nan
df["CapitalExpenditures/PropertyPlant&Equipment"] = df["CapitalExpenditures/PropertyPlant&Equipment"].replace(np.inf, np.nan)
df["CapitalExpenditures/PropertyPlant&Equipment"] = winsorize_series(df["CapitalExpenditures/PropertyPlant&Equipment"],
                                     limits=[0.01, 0.99], msk=None)
df[["isin", "year", "CapitalExpenditures/PropertyPlant&Equipment"]].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
CapitalExpenditures/PropertyPlant&Equipment,33463.0,60.509796,202.832513,0.051632,8.996501,22.177858,47.038519,1771.807719


In [13]:
result_json = df[["isin", "year", "CapitalExpenditures/PropertyPlant&Equipment"
                  ]].to_json(orient="split", index=False)
result_dict = json.loads(result_json)

In [14]:
output_dict = {
    "varibale_name": "CapitalExpenditures/PropertyPlant&Equipment",
    "variable_description": "Capital Expenditures (CAPX) / Property, Plant and Equipment - Total (Net) (PPENT). Winsorized at 1\% and 99\% level.",
    "data_source": "Compustat Global - Fundamentals Annual",
    "reference": "",
    "identifier_entity": "isin",
    "identifier_time": "year",
}
output_dict["data"] = result_dict["data"]

In [15]:
with open("./output/aus_capexppentratio.json", "w") as file:
     json.dump(output_dict, file, indent=4)

3. PPENT $_{t}$ / AT $_{t}$

In [16]:
df["PropertyPlant&Equipment/TotalAssets"] = df["ppent"] / df["at"] * 100
df["PropertyPlant&Equipment/TotalAssets"] = winsorize_series(df["PropertyPlant&Equipment/TotalAssets"],
                                                            limits=[0.01, 0.99], msk=None)
df[["isin", "year", "PropertyPlant&Equipment/TotalAssets"]].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
PropertyPlant&Equipment/TotalAssets,41862.0,31.87973,31.223828,0.0,2.944137,21.25745,58.140005,97.575954


In [17]:
result_json = df[["isin", "year", "PropertyPlant&Equipment/TotalAssets"
                  ]].to_json(orient="split", index=False)
result_dict = json.loads(result_json)

In [18]:
output_dict = {
    "varibale_name": "PropertyPlant&Equipment/TotalAssets",
    "variable_description": "Capital Expenditures (CAPX) / Property, Plant and Equipment - Total (Net) (PPENT). Winsorized at 1\% and 99\% level.",
    "data_source": "Compustat Global - Fundamentals Annual",
    "reference": "",
    "identifier_entity": "isin",
    "identifier_time": "year",
}
output_dict["data"] = result_dict["data"]

In [19]:
with open("./output/aus_ppeatratio.json", "w") as file:
     json.dump(output_dict, file, indent=4)

4. CASH $_{t}$ / AT $_{t - 1}$

In [20]:
df["Chsh/TotalAssets"] = df["che"] / df["at"] * 100
df["Chsh/TotalAssets"] = winsorize_series(df["Chsh/TotalAssets"],
                                          limits=[0.01, 0.99], msk=None)
df[["isin", "year", "Chsh/TotalAssets"]].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
Chsh/TotalAssets,38263.0,27.044258,28.333914,0.008998,4.882468,15.306915,41.868012,98.826267


In [21]:
result_json = df[["isin", "year", "Chsh/TotalAssets"
                  ]].to_json(orient="split", index=False)
result_dict = json.loads(result_json)

In [22]:
output_dict = {
    "varibale_name": "Chsh/TotalAssets",
    "variable_description": "Cash and Short-Term Investments (CHE) / Total Assets (AT). Winsorized at 1\% and 99\% level.",
    "data_source": "Compustat Global - Fundamentals Annual",
    "reference": "",
    "identifier_entity": "isin",
    "identifier_time": "year",
}
output_dict["data"] = result_dict["data"]

In [23]:
with open("./output/aus_cashatratio.json", "w") as file:
     json.dump(output_dict, file, indent=4)

5. Cost of Debt: XINT $_{t}$ / (DLTT $_{t}$ + DLC $_{t}$ )

In [24]:
df["CostofDebt"] = df["xint"] / (df["dltt"] + df["dlc"]) * 100
# inf -> nan
df["CostofDebt"] = df["CostofDebt"].replace(np.inf, np.nan)
df["CostofDebt"] = winsorize_series(df["CostofDebt"],
                                    limits=[0.01, 0.99], msk=None)
df[["isin", "year", "CostofDebt"]].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
CostofDebt,24824.0,19.002865,54.996382,0.0,4.678667,7.317073,11.490067,449.146137


In [25]:
result_json = df[["isin", "year", "CostofDebt"
                  ]].to_json(orient="split", index=False)
result_dict = json.loads(result_json)

In [26]:
output_dict = {
    "varibale_name": "CostofDebt",
    "variable_description": "Interest and Related Expense - Total (XINT) / (Long-Term Debt - Total (DLTT) + Debt in Current Liabilities - Total (DLC)). Winsorized at 1\% and 99\% level.",
    "data_source": "Compustat Global - Fundamentals Annual",
    "reference": "",
    "identifier_entity": "isin",
    "identifier_time": "year",
}
output_dict["data"] = result_dict["data"]

In [27]:
with open("./output/aus_costofdebt.json", "w") as file:
     json.dump(output_dict, file, indent=4)

6. RD $_{t}$ / AT $_{t-1}$

In [28]:
df["R&D/TotalAssets"] = df["xrd"] / df["at"] * 100
# inf -> nan
df["R&D/TotalAssets"] = df["R&D/TotalAssets"].replace(np.inf, np.nan)
df["R&D/TotalAssets"] = winsorize_series(df["R&D/TotalAssets"],
                                    limits=[0.01, 0.99], msk=None)
df[["isin", "year", "R&D/TotalAssets"]].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
R&D/TotalAssets,7272.0,14.240151,26.031564,0.004702,0.624482,3.930721,15.509569,161.778432


In [29]:
result_json = df[["isin", "year", "R&D/TotalAssets"
                  ]].to_json(orient="split", index=False)
result_dict = json.loads(result_json)

In [30]:
output_dict = {
    "varibale_name": "R&D/TotalAssets",
    "variable_description": "Research and Development Expense (XRD) / Assets - Total (AT). Winsorized at 1\% and 99\% level.",
    "data_source": "Compustat Global - Fundamentals Annual",
    "reference": "",
    "identifier_entity": "isin",
    "identifier_time": "year",
}
output_dict["data"] = result_dict["data"]

In [31]:
with open("./output/aus_rdatratio.json", "w") as file:
     json.dump(output_dict, file, indent=4)

7. ROA: NI $_{t}$ / AT $_{t-1}$

In [32]:
df["ROA"] = df["nicon"] / df["at"] * 100
df["ROA"] = winsorize_series(df["ROA"],
                                    limits=[0.01, 0.99], msk=None)
df[["isin", "year", "ROA"]].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
ROA,40577.0,-38.554448,123.040678,-938.920137,-30.704637,-5.766691,4.408872,46.454392


In [33]:
result_json = df[["isin", "year", "ROA"
                  ]].to_json(orient="split", index=False)
result_dict = json.loads(result_json)

In [34]:
output_dict = {
    "varibale_name": "ROA",
    "variable_description": "Net Income (Loss) - Consolidated (NICON) / Assets - Total (AT). Winsorized at 1\% and 99\% level.",
    "data_source": "Compustat Global - Fundamentals Annual",
    "reference": "",
    "identifier_entity": "isin",
    "identifier_time": "year",
}
output_dict["data"] = result_dict["data"]

In [35]:
with open("./output/aus_roa.json", "w") as file:
     json.dump(output_dict, file, indent=4)

8. TobinQ (AT $_{t}$ + MKTVAL $_{t}$ - CEQ $_{t}$ ) / AT $_{t}$ 

In [36]:
# 用股价计算MKTVAL，这里只能算出普通股总价值
df["mktval"] = df["prccm"] * (df["cshoi"] - df["tstkni"])
df[["isin", "year", "mktval"]].describe().T
# 待验证

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
mktval,1810.0,7827.316288,21153.660372,0.172372,150.751669,951.250057,4032.560195,208734.11625


In [37]:
df["TobinQ"] = (df["at"] + df["mktval"] - df["ceq"]) / df["at"]
df["TobinQ"] = winsorize_series(df["TobinQ"],
                                limits=[0.01, 0.99], msk=None)
df[["isin", "year", "TobinQ"]].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
TobinQ,1810.0,1.705737,1.502403,0.470896,0.998051,1.172273,1.767994,9.611434


In [38]:
result_json = df[["isin", "year", "TobinQ"
                  ]].to_json(orient="split", index=False)
result_dict = json.loads(result_json)

In [39]:
output_dict = {
    "varibale_name": "TobinQ",
    "variable_description": "(Assets - Total (AT) + (Par Value - Issue (PV) * (Com Shares Outstanding - Issue (CSHOI) - Treasury Stock - Number of Common Shares - Issue (TSTKNI))) - Common/Ordinary Equity - Total (CEQ)) / Assets - Total (AT). Winsorized at 1\% and 99\% level.",
    "data_source": "Compustat Global - Fundamentals Annual",
    "reference": "",
    "identifier_entity": "isin",
    "identifier_time": "year",
}
output_dict["data"] = result_dict["data"]

In [40]:
with open("./output/aus_tobinq.json", "w") as file:
     json.dump(output_dict, file, indent=4)

9. Size: Log(AT $_{t}$ )

In [41]:
df["Size"] = np.log(df["at"])
# -inf -> nan
df["Size"] = df["Size"].replace(-np.inf, np.nan)
df["Size"] = winsorize_series(df["Size"],
                                limits=[0.01, 0.99], msk=None)
df[["isin", "year", "Size"]].describe().T

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
Size,45015.0,3.492874,2.473464,-2.588413,1.835058,3.20177,5.043022,10.063817


In [42]:
result_json = df[["isin", "year", "Size"
                  ]].to_json(orient="split", index=False)
result_dict = json.loads(result_json)

In [43]:
output_dict = {
    "varibale_name": "Size",
    "variable_description": "Log(Assets - Total (AT)). Winsorized at 1\% and 99\% level.",
    "data_source": "Compustat Global - Fundamentals Annual",
    "reference": "",
    "identifier_entity": "isin",
    "identifier_time": "year",
}
output_dict["data"] = result_dict["data"]

In [44]:
with open("./output/aus_size.json", "w") as file:
     json.dump(output_dict, file, indent=4)

10. Leverage: (DLTT $_{t}$ + DLC $_{t}$ ) / (AT $_{t-1}$ )

In [45]:
df["Leverage"] = (df["dltt"]+df["dlc"]) / df["at"]*100
df["Leverage"] = winsorize_series(df["Leverage"],
                              limits=[0.01, 0.99], msk=None)
df[["isin", "year", "Leverage"]].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
Leverage,41295.0,23.679818,63.498423,0.0,0.0,5.057325,26.102246,533.583333


In [46]:
result_json = df[["isin", "year", "Leverage"
                  ]].to_json(orient="split", index=False)
result_dict = json.loads(result_json)

In [47]:
output_dict = {
    "varibale_name": "Leverage",
    "variable_description": "(Long-Term Debt - Total (DLTT) + Debt in Current Liabilities - Total (DLC)) / Assets - Total (AT). Winsorized at 1\% and 99\% level.",
    "data_source": "Compustat Global - Fundamentals Annual",
    "reference": "",
    "identifier_entity": "isin",
    "identifier_time": "year",
}
output_dict["data"] = result_dict["data"]

In [48]:
with open("./output/aus_leverage.json", "w") as file:
     json.dump(output_dict, file, indent=4)

11. EBITDA: ETITDA $_{t}$ / AT $_{t}$

In [49]:
df["EBITDA"] = df["ebitda"] / df["at"]*100
df["EBITDA"] = winsorize_series(df["EBITDA"],
                              limits=[0.01, 0.99], msk=None)
df[["isin", "year", "EBITDA"]].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,45762.0,2010.424413,7.60638,1987.0,2005.0,2011.0,2017.0,2023.0
EBITDA,45236.0,-37.951431,168.303534,-1414.029302,-24.732306,-3.413552,9.615175,96.784238


In [50]:
result_json = df[["isin", "year", "EBITDA"
                  ]].to_json(orient="split", index=False)
result_dict = json.loads(result_json)

In [51]:
output_dict = {
    "varibale_name": "EBITDA",
    "variable_description": "Earnings Before Interest (EBITDA) / Assets - Total (AT). Winsorized at 1\% and 99\% level.",
    "data_source": "Compustat Global - Fundamentals Annual",
    "reference": "",
    "identifier_entity": "isin",
    "identifier_time": "year",
}
output_dict["data"] = result_dict["data"]

In [52]:
with open("./output/aus_ebitda.json", "w") as file:
     json.dump(output_dict, file, indent=4)