In [165]:
# predefinitions
import pandas as pd
directory="altman z_all(sheet1-2).xlsx"
fail_condition = 1.8 #must be equal to or below this number
naValues=["#N/A N/A", "#N/A Review"]

In [166]:
def getCompanyHistory(filename, sheet, skipRows, naValues=None):
    """
    Generates a Data Frame from a table of altman z values of companies
    throughout the years
    
    :param filename: an Excel file (.xlsx) containing a table of companies and
                    their altmen z values according to dates
    :type filename: str
    :param sheet: Name of the sheet to extract from the file
    :type sheet: str
    :param skipRows: a list of indexes, indicating which rows to skip from the
                    top of the file
    :type skipRows: list of int >= 0
    :param naValues: values in the given table that are to be considered "NaN"
    :type naValues: scalar, str, list-like, or dict, default None
    
    :returns: A Data Frame containing altman z values of companies
    :rtype: pandas.DataFrame
    
    Example:
    >> getCompanyHistory("altman z_all(sheet1-2).xlsx","Sheet1", [0,1,2,4,5],
                             ["#N/A N/A", "#N/A Review"])
    """
    # extracting data
    cols = pd.read_excel(filename, encoding="latin-1", skiprows = skipRows,
                         na_values = naValues, sheet_name = sheet,
                         header=None, nrows=1, index_col = 0).values[0]
    datas = pd.read_excel(filename, encoding="latin-1",
                           skiprows = skipRows, na_values = naValues,
                           sheet_name = sheet, index_col = 0)
    
    # modificatons on the extracted data
    datas.columns = cols # rename columns
    return datas

In [167]:
def dropNaNsAndDups(dataFrame):
    """
    Drops columns which are composesd of Nan values, as well as those that are
    duplicates of other columns in the given Data Frame
    
    :param dataFrame: the data frame from which, columns will be dropped
    :type dataFrame: pandas.DataFrame
    
    :returns: a Data Frame with the unneeded columns removed
    :rtype: pandas.DataFrame
    """
    data_dropped=dataFrame.dropna(axis=1,how="all") # drop columns with no values
    data_dropped=data_dropped.loc[:,~data_dropped.columns.duplicated()] # drop duplicated columns
    return data_dropped

In [168]:
def weedSuccessful(dataFrame, failCond):
    """
    Marks and weeds out companies that are considered not to have "failed" in the past
    from a given data frame of companies and their altman z values throughout
    the years
    
    :param dataFrame: A Data Frame containing altman z values of companies
    :type dataFrame: pandas.DateFrame
    :param failcond: a company which has ever had an altman z value less than
                    or equal to this number will be considered to have "failed"
                    in the past
    :type failcond: float
    
    :returns: A Data Frame containing only the altman z values of companies that have
                "failed" in the past
    :rtype: pandas.DataFrame
    """
    # weeding out companies that don't fail
    data_weeded = dataFrame.where(dataFrame<=failCond)
    data_weeded = data_weeded.dropna(axis=1,how="all")
    return data_weeded

In [169]:
def getCompanyFailHistory(dataFrame, markedData):
    """
    Removes the columns from `dataFrame` that don't exist in `markedData`
    
    :param dataFrame: A Data Frame containing altman z values of companies
    :type dataFrame: pandas.DataFrame
    :param markedData: A Data Frame containing only the altman z values of companies that have
                        "failed" in the past
    :type markedData: pandas.DataFrame
    
    :returns: A Data Frame containing only the altman z values of companies that have
                "failed" in the past
    :rtype: pandas.DataFrame
    """
    columns_to_keep=list(markedData.columns)
    return dataFrame.filter(columns_to_keep,axis=1)

In [170]:
def serialConcat(sheets):
    """
    Concatenates the given sheets by dropping unnecessary columns
    
    :param sheets: the list of Data Frames to be concatenated
    :type sheets: list of pandas.DataFrame objects
    
    :returns: a concatenation of the given sheets with duplicate and NaN
                columns removed
    :rtype: pandas.DataFrame
    """
    counter = 0
    for sheet in sheets:
        sheets[counter] = dropNaNsAndDups(sheet)
        counter += 1
    concatenated_data = pd.concat(sheets, axis=1)
    return dropNaNsAndDups(concatenated_data)

In [171]:
# Extraction of Data
data_r1 = getCompanyHistory(directory,"Sheet1", [0,1,2,4,5], naValues)
data_r2 = getCompanyHistory(directory,"Sheet2", [0,1,2,4,5], naValues)

In [172]:
# Defining main variables
data_r = serialConcat([data_r1, data_r2])
data_lower = weedSuccessful(data_r, fail_condition)
data = getCompanyFailHistory(data_r, data_lower)

In [173]:
#Duygu's Tests
data_test = data_lower.notna().applymap(lambda x: 1 if x else 0)
data_test_applied = data_test.apply(lambda y: y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1))

In [175]:
display(data_test_applied)

Unnamed: 0,BA UN Equity,CAT UN Equity,CVX UN Equity,DOW UN Equity,VZ UN Equity,1COV GY Equity,BAYN GY Equity,BMW GY Equity,CON GY Equity,DAI GY Equity,...,SSW SJ Equity,STP SJ Equity,SUI SJ Equity,TFG SJ Equity,TGO SJ Equity,TKG SJ Equity,TSG SJ Equity,TXT SJ Equity,VKE SJ Equity,WBO SJ Equity
2000-03-31,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2000-06-30,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2000-09-29,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2000-12-29,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2001-03-30,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-06-28,0,0,0,0,0,0,0,59,0,64,...,11,18,15,0,0,0,0,47,8,0
2019-09-30,0,0,0,0,0,0,0,60,0,65,...,12,19,16,0,1,0,1,48,9,0
2019-12-31,0,0,0,0,0,0,0,61,0,66,...,0,20,17,0,2,0,2,49,10,0
2020-03-31,1,0,0,1,0,0,0,62,0,67,...,0,21,18,0,3,1,3,50,11,0
