In [25]:
# predefinitions
import pandas as pd
directory="altman z_all(sheet1-2).xlsx"
fail_condition = 1.8 #must be equal to or below this number
naValues=["#N/A N/A", "#N/A Review"]

In [26]:
def getCompanyHistory(filename, sheet, skipRows, naValues=None):
    """
    Generates a Data Frame from a table of altman z values of companies
    throughout the years
    
    :param filename: an Excel file (.xlsx) containing a table of companies and
                    their altmen z values according to dates
    :type filename: str
    :param sheet: Name of the sheet to extract from the file
    :type sheet: str
    :param skipRows: a list of indexes, indicating which rows to skip from the
                    top of the file
    :type skipRows: list of int >= 0
    :param naValues: values in the given table that are to be considered "NaN"
    :type naValues: scalar, str, list-like, or dict, default None
    
    :returns: A Data Frame containing altman z values of companies
    :rtype: pandas.DataFrame
    
    Example:
    >> getCompanyHistory("altman z_all(sheet1-2).xlsx","Sheet1", [0,1,2,4,5],
                             ["#N/A N/A", "#N/A Review"])
    """
    # extracting data
    cols = pd.read_excel(filename, encoding="latin-1", skiprows = skipRows,
                         na_values = naValues, sheet_name = sheet,
                         header=None, nrows=1, index_col = 0).values[0]
    data = pd.read_excel(filename, encoding="latin-1",
                           skiprows = skipRows, na_values = naValues,
                           sheet_name = sheet, index_col = 0)
    
    # modificatons on the extracted data
    data.columns = cols # rename columns
    data=data.dropna(axis=1,how="all") # drop columns with no values
    return data.loc[:,~data.columns.duplicated()] # drop duplicated columns

In [27]:
def weedSuccessful(dataFrame, failCond):
    """
    Marks and weeds out companies that are considered not to have "failed" in the past
    from a given data frame of companies and their altman z values throughout
    the years
    
    :param dataFrame: A Data Frame containing altman z values of companies
    :type dataFrame: pandas.DateFrame
    :param failcond: a company which has ever had an altman z value less than
                    or equal to this number will be considered to have "failed"
                    in the past
    :type failcond: float
    
    :returns: A Data Frame containing only the altman z values of companies that have
                "failed" in the past
    :rtype: pandas.DataFrame
    """
    # weeding out companies that don't fail
    data_lower = dataFrame.where(dataFrame<=failCond)
    data_lower = data_lower.dropna(axis=1,how="all")
    return data_lower

In [28]:
def getCompanyFailHistory(dataFrame, markedData):
    """
    Removes the columns from `dataFrame` that don't exist in `markedData`
    
    :param dataFrame: A Data Frame containing altman z values of companies
    :type dataFrame: pandas.DataFrame
    :param markedData: A Data Frame containing only the altman z values of companies that have
                        "failed" in the past
    :type markedData: pandas.DataFrame
    
    :returns: A Data Frame containing only the altman z values of companies that have
                "failed" in the past
    :rtype: pandas.DataFrame
    """
    columns_to_keep=list(markedData.columns)
    return dataFrame.filter(columns_to_keep,axis=1)

In [29]:
data_r = getCompanyHistory(directory,"Sheet1", [0,1,2,4,5], naValues)
data_lower = weedSuccessful(data_r, fail_condition)
data = getCompanyFailHistory(data_r, data_lower)

In [30]:
display(data_lower)

Unnamed: 0,BA UN Equity,CAT UN Equity,CVX UN Equity,DOW UN Equity,VZ UN Equity,1COV GY Equity,BAYN GY Equity,BMW GY Equity,CON GY Equity,DAI GY Equity,...,SSW SJ Equity,STP SJ Equity,SUI SJ Equity,TFG SJ Equity,TGO SJ Equity,TKG SJ Equity,TSG SJ Equity,TXT SJ Equity,VKE SJ Equity,WBO SJ Equity
2000-03-31,,,,,,,,,,,...,,,,,,,,,,
2000-06-30,,,,,,,,,,,...,,,,,,,,,,
2000-09-29,,,,,,,,,,,...,,,,,,,,,,
2000-12-29,,,,,1.6739,,,,,,...,,,,,,,,,,
2001-03-30,,,,,1.5789,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-06-28,,,,,,,,1.1596,,1.0932,...,0.7997,1.0466,1.1528,,,,,0.5303,1.1043,
2019-09-30,,,,,,,,1.1394,,1.1058,...,0.7997,1.0055,1.1528,,0.5816,,1.0871,0.6091,1.0201,
2019-12-31,,,,,,,,1.1811,,1.1012,...,,1.0055,1.5705,,0.5816,,1.0871,0.6322,1.0201,
2020-03-31,1.4045,,,1.77,,,,1.1707,,1.0207,...,,1.3498,1.5705,,0.5941,1.6393,1.0296,0.6102,0.4992,
