In [37]:
import pandas as pd
import numpy as np
import polars as pl
from datetime import datetime

from factorlib.utils.system import get_data_dir

In [2]:
tickers = ["OPRA", "SMCI", "LMB", "MLTX", "YPF", "CABA", "WEAV", "ELF", "EDN", "ACLS", "INTT", "ETNB", "CIR", "RCL", "NVDA", "DAKT", "TCMD", "DMAC", "IMVT", "MMMB", "ENIC", "WFRD", "IPDN", "STRL", "RMBS", "MOD", "NGL", "TDW", "TAYD", "VIST", "EXTR", "SYM", "CCL", "CMT", "CBAY", "TGLS", "BELFB", "VECT", "AEHR", "CUK", "UFPT", "AUGX", "ISEE", "TAST", "COCO", "VRT", "BWMN", "ONCY", "BLDR", "ODC", "ATEC", "NVTS", "RMTI", "AVDL", "IRS", "DFH", "CVRX", "PEN", "TGS", "GRBK", "PLPC", "SKYW", "USAP", "ACVA", "RETA", "BTBT", "TROO", "POWL", "PPSI", "FTI", "DO", "SGML", "GGAL", "PCYG", "NETI", "TRHC", "ARDX", "STVN", "NFLX", "INTA", "MORF", "RXST", "HGBL", "GE", "BZH", "BBAR", "PESI", "RIG", "NU", "TK", "JBL", "ERO", "SMHI", "IRON", "EVLV", "GENI", "ELTK", "ENVX", "META", "NCLH"]

In [27]:
raw_data_dir = get_data_dir() / 'raw'
industry_codes = (
    pl.scan_csv(raw_data_dir / 'ohclv_daily.csv', try_parse_dates=True)
    .select(
        pl.col('datadate').alias('date_index'),
        pl.col('tic').alias('ticker'),
        pl.col('gind').alias('industry'),
        pl.col('gsubind').alias('subindustry')
    )
    .filter(pl.col('ticker').is_in(tickers))
    .collect(streaming=True)
)
industry_codes = industry_codes.to_pandas().set_index('date_index')
industry_codes.index = pd.to_datetime(industry_codes.index)
industry_codes = pl.from_pandas(industry_codes.reset_index())
industry_codes

date_index,ticker,industry,subindustry
datetime[ns],str,i64,i64
2023-06-06 00:00:00,"""BELFB""",452030,45203015
2023-06-02 00:00:00,"""BELFB""",452030,45203015
2023-06-01 00:00:00,"""BELFB""",452030,45203015
2023-05-31 00:00:00,"""BELFB""",452030,45203015
2023-05-26 00:00:00,"""BELFB""",452030,45203015
2023-05-25 00:00:00,"""BELFB""",452030,45203015
2023-05-24 00:00:00,"""BELFB""",452030,45203015
2023-05-22 00:00:00,"""BELFB""",452030,45203015
2023-05-19 00:00:00,"""BELFB""",452030,45203015
2023-05-18 00:00:00,"""BELFB""",452030,45203015


In [28]:
fundamentals = pl.scan_csv(raw_data_dir / 'ratios.csv').filter(pl.col('symbol').is_in(tickers)).collect(streaming=True)
fundamentals = fundamentals.rename(mapping={'symbol': 'ticker', 'date': 'date_index'})
fundamentals = fundamentals.to_pandas().set_index('date_index')
fundamentals.index = pd.to_datetime(fundamentals.index)
fundamentals = pl.from_pandas(fundamentals.reset_index())
fundamentals = fundamentals.drop(columns=['period', 'column_1'])
fundamentals

date_index,ticker,currentRatio,quickRatio,cashRatio,daysOfSalesOutstanding,daysOfInventoryOutstanding,operatingCycle,daysOfPayablesOutstanding,cashConversionCycle,grossProfitMargin,operatingProfitMargin,pretaxProfitMargin,netProfitMargin,effectiveTaxRate,returnOnAssets,returnOnEquity,returnOnCapitalEmployed,netIncomePerEBT,ebtPerEbit,ebitPerRevenue,debtRatio,debtEquityRatio,longTermDebtToCapitalization,totalDebtToCapitalization,interestCoverage,cashFlowToDebtRatio,companyEquityMultiplier,receivablesTurnover,payablesTurnover,inventoryTurnover,fixedAssetTurnover,assetTurnover,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,payoutRatio,operatingCashFlowSalesRatio,freeCashFlowOperatingCashFlowRatio,cashFlowCoverageRatios,shortTermCoverageRatios,capitalExpenditureCoverageRatio,dividendPaidAndCapexCoverageRatio,dividendPayoutRatio,priceBookValueRatio,priceToBookRatio,priceToSalesRatio,priceEarningsRatio,priceToFreeCashFlowsRatio,priceToOperatingCashFlowsRatio,priceCashFlowRatio,priceEarningsToGrowthRatio,priceSalesRatio,dividendYield,enterpriseValueMultiple,priceFairValue
datetime[ns],str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2023-03-31 00:00:00,"""ACLS""",3.487479,2.367288,0.61386,272.430931,157.231352,224.406102,36.283558,188.122544,0.409491,0.202177,0.208259,0.187769,0.098389,0.044171,0.067962,0.063257,0.901611,1.030084,0.202177,0.042718,0.065726,0.060043,0.061673,37.957871,0.749198,1.538606,1.339789,2.480462,0.572405,3.668,0.235242,1.05543,0.987021,13.581084,0.0,0.136048,0.935183,0.749198,26.645335,-15.428125,-15.428125,0.0,6.216882,6.216882,17.176356,22.869038,135.002259,126.251859,126.251859,-1.421147,17.176356,0.0,78.237711,6.216882
2022-12-31 00:00:00,"""ACLS""",3.537009,2.425882,0.747969,232.914535,139.376094,196.807075,35.847058,160.960017,0.411654,0.21082,0.247306,0.214215,0.133808,0.056225,0.085412,0.07327,0.866192,1.173064,0.21082,0.045789,0.06956,0.063423,0.065036,38.026441,2.636791,1.519119,1.567098,2.510666,0.645735,3.814242,0.262471,3.728605,3.612619,13.16656,0.0,0.460002,0.968893,2.636791,99.580146,-32.147097,-32.147097,0.0,3.903799,3.903799,9.790729,11.426311,21.967441,21.2841,21.2841,0.268079,9.790729,0.0,36.652529,3.903799
2022-09-30 00:00:00,"""ACLS""",3.729226,2.482589,1.484451,276.954053,161.978404,230.268444,38.630477,191.637967,0.450839,0.232146,0.196396,0.175774,0.105001,0.04445,0.065513,0.076176,0.894999,0.846002,0.232146,0.0515,0.075903,0.06891,0.070548,39.911478,1.371293,1.473859,1.317908,2.329767,0.55563,3.32446,0.25288,1.938778,1.832147,10.36406,0.0,0.279267,0.945001,1.371293,54.983677,-18.182102,-18.182102,0.0,3.251226,3.251226,8.72323,12.406885,33.054119,31.236171,31.236171,-1.385435,8.72323,0.0,37.487941,3.251226
2022-06-30 00:00:00,"""ACLS""",4.443538,2.832497,1.877277,241.136285,157.182776,216.641038,36.451195,180.189843,0.448424,0.244804,0.217907,0.19979,0.08314,0.054438,0.075289,0.082192,0.91686,0.890128,0.244804,0.057806,0.079947,0.072417,0.074029,43.316,0.073567,1.383027,1.513667,2.469055,0.572582,3.400578,0.272474,0.104303,0.048314,8.676789,0.0,0.015607,0.46321,0.073567,3.135332,-1.862925,-1.862925,0.0,3.092352,3.092352,8.206028,10.268306,1135.074822,525.777706,525.777706,1.426154,8.206028,0.0,31.847685,3.092352
2022-03-31 00:00:00,"""ACLS""",4.398096,2.821264,2.014556,213.316904,161.166487,213.765175,40.133094,173.632081,0.440905,0.240296,0.225364,0.204396,0.093041,0.053201,0.074407,0.077081,0.906959,0.937862,0.240296,0.060301,0.084336,0.076194,0.077777,32.22859,0.546357,1.398594,1.711069,2.242538,0.558429,3.190294,0.260286,0.775154,0.729944,8.937915,0.0,0.126575,0.941676,0.546357,24.802695,-17.145709,-17.145709,0.0,4.489748,4.489748,12.333283,15.085037,103.473641,97.438683,97.438683,0.896722,12.333283,0.0,47.699855,4.489748
2021-12-31 00:00:00,"""ACLS""",4.124457,2.659738,1.96432,185.282521,151.02031,196.706411,29.451377,167.255034,0.435056,0.226216,0.220075,0.173805,0.210246,0.04746,0.06633,0.07715,0.789754,0.972856,0.226216,0.06292,0.087936,0.079291,0.080828,36.840063,0.804089,1.397583,1.969965,3.055884,0.595946,3.241568,0.273066,1.144586,1.053972,8.857877,0.0,0.185279,0.920832,0.804089,38.926456,-12.631422,-12.631422,0.0,4.606056,4.606056,12.069365,17.360452,70.741913,65.141442,65.141442,0.569423,12.069365,0.0,45.283993,4.606056
2021-09-30 00:00:00,"""ACLS""",4.15572,2.546317,1.97591,161.656904,176.809008,216.669614,31.891058,184.778556,0.43302,0.205972,0.193629,0.155721,0.195774,0.0387,0.053774,0.063428,0.804226,0.940073,0.205972,0.06694,0.093013,0.083589,0.085098,28.679275,1.39012,1.3895,2.257868,2.822108,0.509024,2.916224,0.248522,1.972747,1.874079,8.083162,0.0,0.374433,0.949985,1.39012,71.834962,-19.993956,-19.993956,0.0,3.082489,3.082489,8.926421,14.330775,25.094988,23.83986,23.83986,0.308663,8.926421,0.0,35.495244,3.082489
2021-06-30 00:00:00,"""ACLS""",4.8287,2.81078,2.063706,197.139753,207.889623,256.499425,43.989311,212.510114,0.434659,0.162941,0.15446,0.128373,0.168894,0.028618,0.038263,0.043303,0.831106,0.947952,0.162941,0.072334,0.096715,0.08673,0.088186,18.83595,0.645008,1.337052,1.851478,2.045952,0.432922,2.530568,0.222926,0.915254,0.884224,6.524661,0.0,0.20929,0.966097,0.645008,35.674769,-29.495694,-29.495694,0.0,2.754946,2.754946,9.242801,17.999899,45.712417,44.162617,44.162617,1.259993,9.242801,0.0,44.520658,2.754946
2021-03-31 00:00:00,"""ACLS""",5.717733,3.370347,2.46508,208.736368,205.581404,257.050646,47.718002,209.332644,0.425039,0.153266,0.13708,0.124119,0.094555,0.026188,0.033794,0.037311,0.905445,0.894398,0.153266,0.076238,0.09838,0.088191,0.089568,19.776482,0.314699,1.290432,1.748617,1.886081,0.437783,2.419035,0.210993,0.447813,0.40786,6.13276,0.0,0.11371,0.910783,0.314699,18.662546,-11.208612,-11.208612,0.0,2.84081,2.84081,10.433733,21.015615,100.745353,91.757143,91.757143,1.849374,10.433733,0.0,56.071285,2.84081
2020-12-31 00:00:00,"""ACLS""",5.576044,3.438832,2.410003,259.459775,209.419277,273.395659,31.219953,242.175707,0.433514,0.115181,0.118323,0.120083,-0.01487,0.023493,0.030469,0.026056,1.01487,1.027282,0.115181,0.077085,0.099977,0.089591,0.09089,10.719726,-0.123866,1.296971,1.406769,2.882772,0.42976,2.224793,0.195636,-0.177775,-0.233427,6.065309,0.0,-0.048806,1.313045,-0.123866,-7.888889,3.19443,3.19443,0.0,2.028475,2.028475,7.994482,16.643685,-124.750065,-163.802441,-163.802441,0.443832,7.994482,0.0,44.736605,2.028475


In [29]:
full_data = industry_codes.join(fundamentals, on=['ticker', 'date_index'], how='inner')
full_data

date_index,ticker,industry,subindustry,currentRatio,quickRatio,cashRatio,daysOfSalesOutstanding,daysOfInventoryOutstanding,operatingCycle,daysOfPayablesOutstanding,cashConversionCycle,grossProfitMargin,operatingProfitMargin,pretaxProfitMargin,netProfitMargin,effectiveTaxRate,returnOnAssets,returnOnEquity,returnOnCapitalEmployed,netIncomePerEBT,ebtPerEbit,ebitPerRevenue,debtRatio,debtEquityRatio,longTermDebtToCapitalization,totalDebtToCapitalization,interestCoverage,cashFlowToDebtRatio,companyEquityMultiplier,receivablesTurnover,payablesTurnover,inventoryTurnover,fixedAssetTurnover,assetTurnover,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,payoutRatio,operatingCashFlowSalesRatio,freeCashFlowOperatingCashFlowRatio,cashFlowCoverageRatios,shortTermCoverageRatios,capitalExpenditureCoverageRatio,dividendPaidAndCapexCoverageRatio,dividendPayoutRatio,priceBookValueRatio,priceToBookRatio,priceToSalesRatio,priceEarningsRatio,priceToFreeCashFlowsRatio,priceToOperatingCashFlowsRatio,priceCashFlowRatio,priceEarningsToGrowthRatio,priceSalesRatio,dividendYield,enterpriseValueMultiple,priceFairValue
datetime[ns],str,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2023-03-31 00:00:00,"""BELFB""",452030,45203015,2.946326,1.438544,0.599429,230.772322,125.74368,182.646445,45.680233,136.966212,0.311377,0.113947,0.108713,0.084552,0.222246,0.025364,0.052405,0.044163,0.777754,0.954069,0.113947,0.209409,0.432675,0.292704,0.302005,19.97762,0.139929,2.066172,1.581645,1.970218,0.715742,2.930024,0.299976,7.859477,6.103641,36.339869,0.05689,0.097683,0.776596,0.139929,3.2134,-4.476203,-5.741814,0.05689,0.28949,0.28949,0.467068,1.381011,6.15698,4.781489,4.781489,-0.515577,0.467068,0.010299,4.646941,0.28949
2022-09-30 00:00:00,"""BELFB""",452030,45203015,2.752841,1.270159,0.517172,211.971852,117.224278,169.49131,47.202488,122.288822,0.289942,0.12412,0.116395,0.093103,0.200116,0.02988,0.070109,0.052938,0.799884,0.937763,0.12412,0.236353,0.554567,0.346214,0.356734,23.369703,0.098727,2.34635,1.721927,1.906679,0.767759,3.170797,0.320935,1.035331,0.869813,5.679779,0.049795,0.072708,0.84013,0.098727,2.188855,-6.255082,-10.404992,0.049795,1.335282,1.335282,1.773221,4.761459,29.029244,24.388339,24.388339,-0.65232,1.773221,0.002614,15.62889,1.335282
2022-06-30 00:00:00,"""BELFB""",452030,45203015,2.804676,1.278501,0.511301,211.368835,115.516065,167.634408,50.366049,117.268359,0.266468,0.064999,0.083789,0.099887,-0.192135,0.031415,0.075391,0.026805,1.192135,1.289077,0.064999,0.25226,0.605394,0.364397,0.3771,14.232349,0.138098,2.399876,1.726839,1.786918,0.779112,2.826895,0.3145,1.510795,1.390373,5.263873,0.048304,0.110768,0.920292,0.138098,2.60571,-12.545817,-27.66325,0.048304,0.861052,0.861052,1.140828,2.855285,11.191245,10.299215,10.299215,0.012194,1.140828,0.004229,17.585708,0.861052
2022-03-31 00:00:00,"""BELFB""",452030,45203015,3.052723,1.324178,0.468156,250.108215,136.272004,197.942523,56.683139,141.259384,0.249594,0.022053,0.048479,0.03704,0.235969,0.009733,0.023515,0.007339,0.764031,2.198342,0.022053,0.264959,0.640117,0.377846,0.390287,4.382267,-0.056205,2.415911,1.459368,1.587774,0.660444,2.180684,0.262777,-0.618899,-0.781852,4.092579,0.16252,-0.056671,1.263294,-0.056205,-1.096984,3.798039,6.366475,0.16252,1.037069,1.037069,1.633574,11.025818,-22.81763,-28.825369,-28.825369,-0.306805,1.633574,0.003685,42.366998,1.037069
2021-12-31 00:00:00,"""BELFB""",452030,45203015,2.937328,1.329242,0.551334,216.145348,116.284936,169.581049,55.029339,114.55171,0.266856,0.071013,0.067424,0.054423,0.192823,0.015645,0.038363,0.026133,0.807177,0.949469,0.071013,0.261891,0.642168,0.378576,0.391049,19.789773,0.047341,2.452039,1.688678,1.635491,0.773961,2.474572,0.287475,0.509024,0.088474,4.953557,0.116384,0.043128,0.17381,0.047341,0.922384,-1.210376,-1.472048,0.116384,0.772233,0.772233,1.095521,5.032415,146.14534,25.401562,25.401562,0.139131,1.095521,0.005782,15.780637,0.772233
2021-09-30 00:00:00,"""BELFB""",452030,45203015,2.970227,1.657489,0.583239,283.434672,103.93641,173.824411,46.714178,127.110234,0.244778,0.060375,0.048862,0.039016,0.201504,0.01176,0.028641,0.023268,0.798496,0.809309,0.060375,0.254453,0.619709,0.37171,0.382605,5.95104,0.001685,2.435455,1.287775,1.92661,0.865914,3.13581,0.301417,0.016836,-0.11922,4.99122,0.142483,0.001422,-7.08134,0.001685,0.037169,-0.123742,-0.239679,0.142483,0.770752,0.770752,1.049944,6.727678,-104.260824,738.306316,738.306316,-0.253277,1.049944,0.005295,17.029589,0.770752
2021-06-30 00:00:00,"""BELFB""",452030,45203015,2.849162,1.63302,0.632059,276.74451,100.015306,168.253678,45.646996,122.606682,0.246531,0.047823,0.043441,0.056796,-0.30745,0.016629,0.040082,0.017991,1.30745,0.908365,0.047823,0.262219,0.632051,0.358791,0.387274,9.202497,-0.029688,2.410396,1.318906,1.971652,0.899862,2.901742,0.292777,-0.297932,-0.399855,5.364642,0.103426,-0.026589,1.342098,-0.029688,-0.258823,2.923138,8.252796,0.103426,0.906931,0.906931,1.285134,5.656751,-36.013088,-48.333098,-48.333098,0.038704,1.285134,0.004571,21.309972,0.906931
2020-12-31 00:00:00,"""BELFB""",452030,45203015,3.201152,1.947153,0.970332,268.753326,103.807796,170.075739,41.233672,128.842067,0.252435,0.046819,0.037243,0.030578,0.17896,0.007824,0.019112,0.014842,0.82104,0.795475,0.046819,0.269179,0.657544,0.372498,0.396698,6.021041,0.092813,2.44278,1.358123,2.182682,0.866987,2.383698,0.255866,0.917543,0.838647,6.8732,0.264996,0.097641,0.914014,0.092813,0.954702,-11.629744,-333.5,0.264996,0.999686,0.999686,1.599435,13.07665,17.921723,16.380698,16.380698,-0.23461,1.599435,0.005066,23.901033,0.999686
2020-09-30 00:00:00,"""BELFB""",452030,45203015,3.172811,1.897252,0.904105,261.28936,102.555355,166.982869,39.707337,127.275531,0.269367,0.07084,0.051345,0.060044,-0.169431,0.016323,0.041813,0.02395,1.169431,0.724799,0.07084,0.288428,0.738856,0.404864,0.424909,7.100644,0.135715,2.561668,1.396919,2.266584,0.877575,2.462506,0.271845,1.449386,1.326407,6.559589,0.108227,0.143993,0.915151,0.135715,1.71213,-11.785667,-25.176966,0.108227,0.73888,0.73888,1.061034,4.417734,8.051828,7.36864,7.36864,0.135477,1.061034,0.006125,15.6157,0.73888
2020-06-30 00:00:00,"""BELFB""",452030,45203015,3.236281,1.916193,0.853809,282.187304,105.425321,175.005752,45.125331,129.880421,0.262181,0.062292,0.049483,0.045992,0.070547,0.012085,0.033189,0.020238,0.929453,0.794383,0.062292,0.307299,0.843918,0.441864,0.457676,6.0384,0.061993,2.746239,1.293467,1.994445,0.853685,2.245548,0.262764,0.712895,0.619735,6.109551,0.144805,0.0725,0.869323,0.061993,1.001482,-7.652439,-25.762463,0.144805,0.787443,0.787443,1.091224,5.931536,17.313839,15.051314,15.051314,0.02388,1.091224,0.006103,17.495754,0.787443


In [60]:
# the following list was determined from chatgpt as the best metrics to convert to categorical data
subindustry_metrics = [
    'grossProfitMargin',
    'operatingProfitMargin',
    'pretaxProfitMargin',
    'netProfitMargin',
    'returnOnAssets',
    'returnOnEquity',
    'returnOnCapitalEmployed',
    'debtRatio',
    'debtEquityRatio',
    'longTermDebtToCapitalization',
    'totalDebtToCapitalization',
    'currentRatio',
    'quickRatio',
    'cashRatio',
    'daysOfSalesOutstanding',
    'daysOfInventoryOutstanding',
    'operatingCycle',
    'daysOfPayablesOutstanding',
    'cashConversionCycle',
    'priceToBookRatio',
    'priceToSalesRatio',
    'priceEarningsRatio',
    'priceToFreeCashFlowsRatio',
    'priceToOperatingCashFlowsRatio',
    'priceCashFlowRatio'
]
# Then sort the dataframe by date
subindustry_fundamentals = full_data.to_pandas()
subindustry_fundamentals = subindustry_fundamentals.sort_values(by=['date_index'])

grouped_df = subindustry_fundamentals.groupby([pd.Grouper(key='date_index', freq='Q'), 'subindustry'])[subindustry_metrics].mean().reset_index()

# Merge this back to your original dataframe
merged_df = pd.merge(subindustry_fundamentals[['date_index', 'ticker', 'subindustry']], grouped_df,  how='inner', on=['date_index', 'subindustry'])
merged_df

Unnamed: 0,date_index,ticker,subindustry,grossProfitMargin,operatingProfitMargin,pretaxProfitMargin,netProfitMargin,returnOnAssets,returnOnEquity,returnOnCapitalEmployed,...,daysOfInventoryOutstanding,operatingCycle,daysOfPayablesOutstanding,cashConversionCycle,priceToBookRatio,priceToSalesRatio,priceEarningsRatio,priceToFreeCashFlowsRatio,priceToOperatingCashFlowsRatio,priceCashFlowRatio
0,1990-12-31,HGBL,40203010,1.000000,0.195122,0.195122,0.073171,0.014320,0.109091,0.067511,...,0.000000,,0.000000,,6.545455,4.390244,15.000000,32.727273,7.058824,7.058824
1,1990-12-31,MOD,25101010,0.264032,0.049012,0.184980,0.115415,0.036300,0.071115,0.019764,...,88.839957,143.480273,38.184748,105.295526,1.366186,2.217217,4.802705,11.309597,7.791056,7.791056
2,1990-12-31,SKYW,20302010,0.065934,-0.036630,-0.036630,-0.021978,-0.007926,-0.017493,-0.015649,...,21.529412,35.705236,16.588235,19.117001,0.874636,1.098901,-12.500000,-25.000000,16.666667,16.666667
3,1990-12-31,GE,20105010,0.471857,0.206695,0.099723,0.073024,0.000000,0.000000,0.000000,...,,,,,0.000000,2.170208,7.429764,38.841679,13.226261,13.226261
4,1991-09-30,SKYW,20302010,0.295732,0.042683,0.033537,0.021341,0.009537,0.020468,0.023102,...,25.714286,41.354530,19.480519,21.874010,0.582765,0.607639,7.118058,24.913204,6.429214,6.429214
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3201,2023-03-31,ACVA,20201070,0.500794,-0.174159,-0.149240,-0.152141,-0.018289,-0.037845,-0.035811,...,0.000000,220.792135,584.364011,-363.571875,4.260191,17.126305,-28.142190,54.241386,47.667552,47.667552
3202,2023-03-31,CMT,15101010,0.178309,0.081150,0.078095,0.058810,0.028300,0.047681,0.052591,...,27.529230,75.014232,36.592632,38.421599,1.233897,1.521901,6.469575,60.095167,32.588728,32.588728
3203,2023-03-31,HGBL,40203010,0.598182,0.234409,0.230315,0.170299,0.037446,0.055218,0.072092,...,47.083146,100.209801,220.085393,-119.875592,2.016957,6.220490,9.131741,11.668336,11.552239,11.552239
3204,2023-03-31,PPSI,20104020,0.260139,0.006465,0.014341,0.022217,0.005271,0.010999,0.003015,...,137.116301,220.303089,92.731173,127.571916,2.001210,4.042412,45.487829,25.836813,22.550032,22.550032


In [65]:
# NEED TO RESAMPLE AND MAKE SURE ITS CORRECT
fundamental_data_dir = get_data_dir() / 'fundamental'
categorical_fundamentals = merged_df.drop(columns=['subindustry']).set_index('date_index')
categorical_fundamentals = categorical_fundamentals.groupby('ticker').resample('B').ffill().reset_index(level=0, drop=True)
categorical_fundamentals

Unnamed: 0_level_0,ticker,grossProfitMargin,operatingProfitMargin,pretaxProfitMargin,netProfitMargin,returnOnAssets,returnOnEquity,returnOnCapitalEmployed,debtRatio,debtEquityRatio,...,daysOfInventoryOutstanding,operatingCycle,daysOfPayablesOutstanding,cashConversionCycle,priceToBookRatio,priceToSalesRatio,priceEarningsRatio,priceToFreeCashFlowsRatio,priceToOperatingCashFlowsRatio,priceCashFlowRatio
date_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2001-12-31,ACLS,0.278583,-0.726667,-0.511650,-0.308605,-0.040319,-0.046684,-0.133826,0.003956,0.004619,...,318.268684,427.943566,41.494148,386.449418,1.709572,13.793878,-10.569581,-6.374534,-32.454747,-32.454747
2002-01-01,ACLS,0.278583,-0.726667,-0.511650,-0.308605,-0.040319,-0.046684,-0.133826,0.003956,0.004619,...,318.268684,427.943566,41.494148,386.449418,1.709572,13.793878,-10.569581,-6.374534,-32.454747,-32.454747
2002-01-02,ACLS,0.278583,-0.726667,-0.511650,-0.308605,-0.040319,-0.046684,-0.133826,0.003956,0.004619,...,318.268684,427.943566,41.494148,386.449418,1.709572,13.793878,-10.569581,-6.374534,-32.454747,-32.454747
2002-01-03,ACLS,0.278583,-0.726667,-0.511650,-0.308605,-0.040319,-0.046684,-0.133826,0.003956,0.004619,...,318.268684,427.943566,41.494148,386.449418,1.709572,13.793878,-10.569581,-6.374534,-32.454747,-32.454747
2002-01-04,ACLS,0.278583,-0.726667,-0.511650,-0.308605,-0.040319,-0.046684,-0.133826,0.003956,0.004619,...,318.268684,427.943566,41.494148,386.449418,1.709572,13.793878,-10.569581,-6.374534,-32.454747,-32.454747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-27,YPF,0.267015,0.321531,0.182347,0.127733,0.024685,0.062312,0.078972,0.297023,0.749780,...,50.264346,86.709192,58.474169,28.235023,0.243472,0.499092,0.976828,4.942414,1.669913,1.669913
2023-03-28,YPF,0.267015,0.321531,0.182347,0.127733,0.024685,0.062312,0.078972,0.297023,0.749780,...,50.264346,86.709192,58.474169,28.235023,0.243472,0.499092,0.976828,4.942414,1.669913,1.669913
2023-03-29,YPF,0.267015,0.321531,0.182347,0.127733,0.024685,0.062312,0.078972,0.297023,0.749780,...,50.264346,86.709192,58.474169,28.235023,0.243472,0.499092,0.976828,4.942414,1.669913,1.669913
2023-03-30,YPF,0.267015,0.321531,0.182347,0.127733,0.024685,0.062312,0.078972,0.297023,0.749780,...,50.264346,86.709192,58.474169,28.235023,0.243472,0.499092,0.976828,4.942414,1.669913,1.669913


In [66]:
categorical_fundamentals.to_csv(fundamental_data_dir / 'subindustry_fundamentals.csv')
categorical_fundamentals

Unnamed: 0_level_0,ticker,grossProfitMargin,operatingProfitMargin,pretaxProfitMargin,netProfitMargin,returnOnAssets,returnOnEquity,returnOnCapitalEmployed,debtRatio,debtEquityRatio,...,daysOfInventoryOutstanding,operatingCycle,daysOfPayablesOutstanding,cashConversionCycle,priceToBookRatio,priceToSalesRatio,priceEarningsRatio,priceToFreeCashFlowsRatio,priceToOperatingCashFlowsRatio,priceCashFlowRatio
date_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2001-12-31,ACLS,0.278583,-0.726667,-0.511650,-0.308605,-0.040319,-0.046684,-0.133826,0.003956,0.004619,...,318.268684,427.943566,41.494148,386.449418,1.709572,13.793878,-10.569581,-6.374534,-32.454747,-32.454747
2002-01-01,ACLS,0.278583,-0.726667,-0.511650,-0.308605,-0.040319,-0.046684,-0.133826,0.003956,0.004619,...,318.268684,427.943566,41.494148,386.449418,1.709572,13.793878,-10.569581,-6.374534,-32.454747,-32.454747
2002-01-02,ACLS,0.278583,-0.726667,-0.511650,-0.308605,-0.040319,-0.046684,-0.133826,0.003956,0.004619,...,318.268684,427.943566,41.494148,386.449418,1.709572,13.793878,-10.569581,-6.374534,-32.454747,-32.454747
2002-01-03,ACLS,0.278583,-0.726667,-0.511650,-0.308605,-0.040319,-0.046684,-0.133826,0.003956,0.004619,...,318.268684,427.943566,41.494148,386.449418,1.709572,13.793878,-10.569581,-6.374534,-32.454747,-32.454747
2002-01-04,ACLS,0.278583,-0.726667,-0.511650,-0.308605,-0.040319,-0.046684,-0.133826,0.003956,0.004619,...,318.268684,427.943566,41.494148,386.449418,1.709572,13.793878,-10.569581,-6.374534,-32.454747,-32.454747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-27,YPF,0.267015,0.321531,0.182347,0.127733,0.024685,0.062312,0.078972,0.297023,0.749780,...,50.264346,86.709192,58.474169,28.235023,0.243472,0.499092,0.976828,4.942414,1.669913,1.669913
2023-03-28,YPF,0.267015,0.321531,0.182347,0.127733,0.024685,0.062312,0.078972,0.297023,0.749780,...,50.264346,86.709192,58.474169,28.235023,0.243472,0.499092,0.976828,4.942414,1.669913,1.669913
2023-03-29,YPF,0.267015,0.321531,0.182347,0.127733,0.024685,0.062312,0.078972,0.297023,0.749780,...,50.264346,86.709192,58.474169,28.235023,0.243472,0.499092,0.976828,4.942414,1.669913,1.669913
2023-03-30,YPF,0.267015,0.321531,0.182347,0.127733,0.024685,0.062312,0.078972,0.297023,0.749780,...,50.264346,86.709192,58.474169,28.235023,0.243472,0.499092,0.976828,4.942414,1.669913,1.669913


In [71]:
non_cat_fundamentals = fundamentals[[ticker for ticker in fundamentals.columns if ticker not in subindustry_metrics]]
non_cat_fundamentals.write_csv(fundamental_data_dir / 'non_cat_fundamentals.csv')
non_cat_fundamentals

date_index,ticker,effectiveTaxRate,netIncomePerEBT,ebtPerEbit,ebitPerRevenue,interestCoverage,cashFlowToDebtRatio,companyEquityMultiplier,receivablesTurnover,payablesTurnover,inventoryTurnover,fixedAssetTurnover,assetTurnover,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,payoutRatio,operatingCashFlowSalesRatio,freeCashFlowOperatingCashFlowRatio,cashFlowCoverageRatios,shortTermCoverageRatios,capitalExpenditureCoverageRatio,dividendPaidAndCapexCoverageRatio,dividendPayoutRatio,priceBookValueRatio,priceEarningsToGrowthRatio,priceSalesRatio,dividendYield,enterpriseValueMultiple,priceFairValue
datetime[ns],str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2023-03-31 00:00:00,"""ACLS""",0.098389,0.901611,1.030084,0.202177,37.957871,0.749198,1.538606,1.339789,2.480462,0.572405,3.668,0.235242,1.05543,0.987021,13.581084,0.0,0.136048,0.935183,0.749198,26.645335,-15.428125,-15.428125,0.0,6.216882,-1.421147,17.176356,0.0,78.237711,6.216882
2022-12-31 00:00:00,"""ACLS""",0.133808,0.866192,1.173064,0.21082,38.026441,2.636791,1.519119,1.567098,2.510666,0.645735,3.814242,0.262471,3.728605,3.612619,13.16656,0.0,0.460002,0.968893,2.636791,99.580146,-32.147097,-32.147097,0.0,3.903799,0.268079,9.790729,0.0,36.652529,3.903799
2022-09-30 00:00:00,"""ACLS""",0.105001,0.894999,0.846002,0.232146,39.911478,1.371293,1.473859,1.317908,2.329767,0.55563,3.32446,0.25288,1.938778,1.832147,10.36406,0.0,0.279267,0.945001,1.371293,54.983677,-18.182102,-18.182102,0.0,3.251226,-1.385435,8.72323,0.0,37.487941,3.251226
2022-06-30 00:00:00,"""ACLS""",0.08314,0.91686,0.890128,0.244804,43.316,0.073567,1.383027,1.513667,2.469055,0.572582,3.400578,0.272474,0.104303,0.048314,8.676789,0.0,0.015607,0.46321,0.073567,3.135332,-1.862925,-1.862925,0.0,3.092352,1.426154,8.206028,0.0,31.847685,3.092352
2022-03-31 00:00:00,"""ACLS""",0.093041,0.906959,0.937862,0.240296,32.22859,0.546357,1.398594,1.711069,2.242538,0.558429,3.190294,0.260286,0.775154,0.729944,8.937915,0.0,0.126575,0.941676,0.546357,24.802695,-17.145709,-17.145709,0.0,4.489748,0.896722,12.333283,0.0,47.699855,4.489748
2021-12-31 00:00:00,"""ACLS""",0.210246,0.789754,0.972856,0.226216,36.840063,0.804089,1.397583,1.969965,3.055884,0.595946,3.241568,0.273066,1.144586,1.053972,8.857877,0.0,0.185279,0.920832,0.804089,38.926456,-12.631422,-12.631422,0.0,4.606056,0.569423,12.069365,0.0,45.283993,4.606056
2021-09-30 00:00:00,"""ACLS""",0.195774,0.804226,0.940073,0.205972,28.679275,1.39012,1.3895,2.257868,2.822108,0.509024,2.916224,0.248522,1.972747,1.874079,8.083162,0.0,0.374433,0.949985,1.39012,71.834962,-19.993956,-19.993956,0.0,3.082489,0.308663,8.926421,0.0,35.495244,3.082489
2021-06-30 00:00:00,"""ACLS""",0.168894,0.831106,0.947952,0.162941,18.83595,0.645008,1.337052,1.851478,2.045952,0.432922,2.530568,0.222926,0.915254,0.884224,6.524661,0.0,0.20929,0.966097,0.645008,35.674769,-29.495694,-29.495694,0.0,2.754946,1.259993,9.242801,0.0,44.520658,2.754946
2021-03-31 00:00:00,"""ACLS""",0.094555,0.905445,0.894398,0.153266,19.776482,0.314699,1.290432,1.748617,1.886081,0.437783,2.419035,0.210993,0.447813,0.40786,6.13276,0.0,0.11371,0.910783,0.314699,18.662546,-11.208612,-11.208612,0.0,2.84081,1.849374,10.433733,0.0,56.071285,2.84081
2020-12-31 00:00:00,"""ACLS""",-0.01487,1.01487,1.027282,0.115181,10.719726,-0.123866,1.296971,1.406769,2.882772,0.42976,2.224793,0.195636,-0.177775,-0.233427,6.065309,0.0,-0.048806,1.313045,-0.123866,-7.888889,3.19443,3.19443,0.0,2.028475,0.443832,7.994482,0.0,44.736605,2.028475
