In [1]:
# predefinitions
import pandas as pd
directory="altman z_all(sheet1-2).xlsx"
fail_condition = 1.8 #must be equal to or below this number
naValues=["#N/A N/A", "#N/A Review"]

In [2]:
def getCompanyHistory(filename, skipRows, naValues=None):
    """
    Generates a Data Frame from a table of altman z values of companies
    throughout the years
    
    :param filename: an Excel file (.xlsx) containing a table of companies and
                    their altmen z values according to dates
    :type filename: str
    :param skipRows: a list of indexes, indicating which rows to skip from the
                    top of the file
    :type skipRows: list of int >= 0
    :param naValues: values in the given table that are to be considered "NaN"
    :type naValues: scalar, str, list-like, or dict, default None
    
    :returns: A Data Frame containing altman z values of companies
    :rtype: pandas.DataFrame
    
    Example:
    >> getCompanyHistory("altman z_all(sheet1-2).xlsx","Sheet1", [0,1,2,4,5],
                             ["#N/A N/A", "#N/A Review"])
    """
    # extracting data
    cols = pd.read_excel(filename, encoding="latin-1", skiprows = skipRows,
                         na_values = naValues, sheet_name = "Sheet1",
                         header=None, nrows=1, index_col = 0).values[0]
    data = pd.read_excel(filename, encoding="latin-1",
                           skiprows = skipRows, na_values = naValues,
                           sheet_name = "Sheet1", index_col = 0)
    
    # modificatons on the extracted data
    data.columns = cols # rename columns
    data=data.dropna(axis=1,how="all") # drop columns with no values
    return data.loc[:,~data.columns.duplicated()] # drop duplicated columns

In [3]:
def weedSuccessful(dataFrame, failCond):
    """
    Marks and weeds out companies that are considered not to have "failed" in the past
    from a given data frame of companies and their altman z values throughout
    the years
    
    :param dataFrame: A Data Frame containing altman z values of companies
    :type dataFrame: pandas.DateFrame
    :param failcond: a company which has ever had an altman z value less than
                    or equal to this number will be considered to have "failed"
                    in the past
    :type failcond: float
    
    :returns: A Data Frame containing only the altman z values of companies that have
                "failed" in the past
    :rtype: pandas.DataFrame
    """
    # weeding out companies that don't fail
    data_lower = dataFrame.where(dataFrame<=failCond)
    data_lower = data_lower.dropna(axis=1,how="all")
    return data_lower

In [4]:
def getCompanyFailHistory(dataFrame, markedData):
    """
    Removes the columns from `dataFrame` that don't exist in `markedData`
    
    :param dataFrame: A Data Frame containing altman z values of companies
    :type dataFrame: pandas.DataFrame
    :param markedData: A Data Frame containing only the altman z values of companies that have
                        "failed" in the past
    :type markedData: pandas.DataFrame
    
    :returns: A Data Frame containing only the altman z values of companies that have
                "failed" in the past
    :rtype: pandas.DataFrame
    """
    columns_to_keep=list(markedData.columns)
    return dataFrame.filter(columns_to_keep,axis=1)

In [5]:
def loops(dictionary_of_arrays_groupby):
    list_of_times = []
    for item in dictionary_of_arrays_groupby.items():
        list_of_times.append(item[-1])
    final_list = []
    for item in list_of_times:
        final_list.append(item[-1]+1)
    return final_list

In [6]:
#Reading the data
data_r = getCompanyHistory(directory, [0,1,2,4,5], naValues)
data_lower = weedSuccessful(data_r, fail_condition)
data = getCompanyFailHistory(data_r, data_lower)

In [8]:
#Modifications to data to extract relevant information 
#Masks the Data Frame cahnging values below 1.8 to 1 and reset to 0 then counts each consecutive period separately
data_masked = data_lower.notna().applymap(lambda x: 1 if x else 0)
data_masked_counted = data_masked.apply(lambda y: y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1))

#Indexes the masked and counted Data Frame thus displaying each time a value repeat 
#First row is removed since that shows amount of zeros which is not used
data_mc_indexed = data_masked_counted.apply(lambda x: x.value_counts())[1:]

In [10]:
#The longest each company has stayed below altman-z-score of 1.8
longest_under = data_masked_counted.apply(lambda x: x.max()).to_frame()
display(longest_under.sort_values(0, ascending = False))

#How many times each company dropped below 1.8
times_went_under = data_mc_indexed[0:1]
display(times_went_under)

#How long each company has stayed below 1.8 each time they dropped below 1.8
data_time_spent_under = data_mc_indexed.apply(lambda x: loops(data_mc_indexed.groupby(x).indices)).to_frame()
display(data_time_spent_under)

Unnamed: 0,0
ASUR UR Equity,80
SCI UN Equity,79
MGM UN Equity,79
CDZI UQ Equity,79
NI UN Equity,79
...,...
600489 CH Equity,1
3465 JT Equity,1
3464 JT Equity,1
3392 JT Equity,1


Unnamed: 0,BA UN Equity,CAT UN Equity,CVX UN Equity,DOW UN Equity,VZ UN Equity,1COV GY Equity,BAYN GY Equity,BMW GY Equity,CON GY Equity,DAI GY Equity,...,SSW SJ Equity,STP SJ Equity,SUI SJ Equity,TFG SJ Equity,TGO SJ Equity,TKG SJ Equity,TSG SJ Equity,TXT SJ Equity,VKE SJ Equity,WBO SJ Equity
1,1.0,3.0,1.0,2.0,7.0,1.0,1.0,1.0,2.0,1.0,...,3.0,1.0,10.0,1.0,1.0,6.0,3.0,1.0,3.0,12.0


Unnamed: 0,0
BA UN Equity,[2]
CAT UN Equity,"[7, 5, 1]"
CVX UN Equity,[1]
DOW UN Equity,[1]
VZ UN Equity,"[25, 6, 2, 1]"
...,...
TKG SJ Equity,"[4, 2]"
TSG SJ Equity,"[4, 2]"
TXT SJ Equity,[51]
VKE SJ Equity,"[14, 12, 4]"
