In [37]:
import pandas as pd
import numpy as np
import os
import shutil
from IPython.display import Markdown, clear_output


class formulas:

    def __init__(self, df):
        self._df = df

    
    def _styled_formula_table(self, styled_table):
        """Aligns pandas dataframe styler object column headers and data to the left"""
    
        # Note the parameter is a styler, not a data frame
        output_table = styled_table.set_table_styles ([
            {'selector': 'th.col_heading', 'props': 'text-align: left; font-size:1em;'},
            {'selector': 'td', 'props': 'text-align: left; font-size:1em;padding: 1.5em;'}]) 

        
        # below allows newlines in the csv, outside of the latex dollar signs to be reflected on display
        output_table= output_table.set_properties(**{'white-space': 'pre-wrap'})
        
        return (output_table)

    
    def _set_styled_table_widths(self, styled_table, widths):
        """Sets pandas dataframe stlyle column withs where widths is represents a dict of column names and widths in pixels as integers"""
    
        return_table = styled_table
        for column_name, width in widths.items():
            return_table = return_table.set_properties(subset=[column_name], **{'width': str(width) + 'px'})
    
        return(return_table)

    
    def is_on_formula_sheet(self, formula):
        """Returns true if formula is listed on formula sheet as per logic below"""
        formulas_one_on_sheet = df[df['On formula sheet'] == True]['Formula_1'].values
        formulas_two_on_sheet = df[df['On formula sheet'] == True]['Formula_2'].values
        formulas_all_on_sheet = np.vstack((formulas_one_on_sheet, formulas_two_on_sheet))
        return(formula in formulas_all_on_sheet)

    
    def _is_on_formula_sheet_formatting(self, formula):
        """Returns formatting for pandas styler object based on the return value of 
        fucntion is_on_formula_sheet for the given formula paramater"""
        if self.is_on_formula_sheet(formula):
            return ('background-color:rgba(255,194,10, 0.2);')
        else:
            return (None)
    
    def _apply_formating_to_values_on_formula_sheet(self, styled_table, columns_to_format):
        """applys formattign to columsn_to_format based using function _is_on_formula_sheet_formatting"""
        styled_table = styled_table.applymap(self._is_on_formula_sheet_formatting, subset=columns_to_format) 
        return(styled_table)
    
    
    def all(self, highlight_items_on_formula_sheet = False):
        """returns all formulas in styled pandas dataframe"""
        
        styled_table = self._df.fillna('').style
        styled_table = self._set_styled_table_widths(styled_table, {
            'Formula_1':400, 'Formula_2':400, 'Comment':600})
        styled_table = self._styled_formula_table(styled_table)
        if highlight_items_on_formula_sheet:
            styled_table = styled_table.applymap(self._is_on_formula_sheet_formatting, subset=['Formula_1', 'Formula_2']) 
        return (styled_table)


    def _calclus_summary_comment(self, row):
        """Returns a comment for calculus formula summary based on derivative and integral comments"""
        if row['Comment Differentiation'] == row['Comment Integration']:
            return_value = row['Comment Differentiation']
        elif row['Comment Differentiation'] == '':
            return_value = row['Comment Integration']
        elif row['Comment Integration'] == '':
            return_value = row['Comment Differentiation']            
        else:
            return_value = row['Comment Differentiation'] + '\n' + row['Comment Integration']

        return(return_value)


    def by_category(self, category, highlight_items_on_formula_sheet = False):
        """returns all formulas for given category"""

        df_by_category =  self._df[self._df['Category'] == category]
        
        styled_table = df_by_category.fillna('').style
        styled_table = self._set_styled_table_widths(styled_table, {
            'Formula_1':400, 'Formula_2':400, 'Comment':600})
        styled_table = self._styled_formula_table(styled_table)

        if highlight_items_on_formula_sheet:
            styled_table = styled_table.applymap(self._is_on_formula_sheet_formatting, subset=['Formula_1', 'Formula_2']) 
        
        return (styled_table)


    def by_category_formula(self, category, highlight_items_on_formula_sheet = False):
        """returns all formulas for given category, returns formula_1 column and formula_2 column if not empty"""

        df_by_category =  self._df[self._df['Category'] == category]   
        formula_2_col_is_empty = df_by_category['Formula_2'].dropna().empty
        
        if formula_2_col_is_empty:
            df_by_category = df_by_category[['Formula_1']]
        else:
            df_by_category = df_by_category[['Formula_1', 'Formula_2']]
        
        styled_table = df_by_category.fillna('').style
        styled_table = self._set_styled_table_widths(styled_table, {'Formula_1':400})
        styled_table = self._styled_formula_table(styled_table)
        styled_table = styled_table.hide().hide(axis='columns')

        if highlight_items_on_formula_sheet and formula_2_col_is_empty:
            styled_table = styled_table.applymap(self._is_on_formula_sheet_formatting, subset=['Formula_1']) 
        elif highlight_items_on_formula_sheet and not formula_2_col_is_empty:
            styled_table = styled_table.applymap(self._is_on_formula_sheet_formatting, subset=['Formula_1', 'Formula_2']) 
        
        return (styled_table)    


    def categories(self, subject_code = None):
        """returns NumPy Array of unique formula categories, optionally filtered by subject_code"""
        if subject_code==None:
            return(self._df['Category'].unique())
        else:
            return(self._df[self._df['Subject code'] == subject_code]['Category'].unique())
    
    

    # def list_by_category(self, category):
    #     """returns all formulas for given category in a list format (formula only)"""

    #     df_by_category =  self._df[self._df['Category'] == category]
    #     formula_list = list(df_by_category['Formula_1'])
    #     return (formula_list)
    

    def unique_state_subject_categories(self):
        """Returns a numpy array consisting of unique instances of state, subject and categroy"""
        return(my_formulas._df[['State', 'Subject code', 'Category']].drop_duplicates().to_numpy(dtype = str))
    
    
    def calculus_summary(self, highlight_items_on_formula_sheet = False):
        """Returns a summary of derivative and integral formulas"""
        
        df_calculus = self._df[['Category', 'Group', 'Formula_1', 'Formula_2', 'Comment']][df["Category"].isin(["Differentiation","Integration"])]
        df_calculus = df_calculus.pivot(columns='Category', index = 'Group').fillna('')
        
        # Flatten the multi-index headings after pivot
        df_calculus.columns = df_calculus.columns.get_level_values(0) +' ' + df_calculus.columns.get_level_values(1)
        df_calculus = df_calculus.reset_index()

        df_calculus['Comment'] = df_calculus.apply(self._calclus_summary_comment, axis=1)
        
        df_calculus = df_calculus.sort_values(by='Group')
        df_calculus =  df_calculus.drop(labels = ['Group', 'Comment Differentiation', 'Comment Integration', 'Formula_2 Integration'], axis = 1)
        df_calculus = df_calculus.rename(columns={
            "Formula_1 Differentiation": "Function", 
            "Formula_1 Integration":"Equivalent integral",
            "Formula_2 Differentiation": "Derivative"})

        # Reorder columns and style
        df_calculus = df_calculus[['Function', 'Derivative', 'Equivalent integral', 'Comment']]
        my_table = df_calculus.style
        my_table = self._set_styled_table_widths(my_table, {'Function': 200, 
                                                            'Derivative': 300,
                                                            'Equivalent integral': 400,
                                                            'Comment':600})
        my_table = self._styled_formula_table(my_table)
        if highlight_items_on_formula_sheet:
            my_table = self._apply_formating_to_values_on_formula_sheet(my_table, ['Derivative', 'Equivalent integral'])
            
        # Hide the index
        my_table = my_table.hide()
        
        return(my_table)



def delete_docs_folder():
    """Deletes the docs folder used to generate hugo webiste content.  the file 
    path is determined by relative reference to the file containing this code"""

    docs_path = os.path.join(os.path.dirname(os.getcwd()), 'content', 'docs')
    if os.path.isdir(docs_path):
        shutil.rmtree(docs_path)
    


if __name__ == '__main__':
    
    # Latex string in csv needs to be enclosed a single $ to enable left align
    df=  pd.read_csv(filepath_or_buffer='formulas.csv')

    my_formulas = formulas(df)

    # display(df)

    # formula_list = my_formulas.list_by_category(category = 'Indices')
    # display(Markdown('# Indices'))
    # display(Markdown('<br>'))
    # for formula in formula_list:
    #     display(Markdown(formula.replace('\(', '$').replace('\)', '$')))
    #     display(Markdown('<br>'))

    # display(my_formulas.all(highlight_items_on_formula_sheet = False))
    # display (Markdown('#'))
    # display (my_formulas.calculus_summary(highlight_items_on_formula_sheet = True))
    # display(my_formulas.by_category('Surds', highlight_items_on_formula_sheet = True))
    # display(my_formulas.by_category_formula_1('Statistics', highlight_items_on_formula_sheet = True))

    #Below doesnt render too badly on hugo, still some glitches though
    # display(Markdown(my_formulas._df[df['Category'] == 'Indices'][['Formula_1']].rename(columns={"Formula_1": ""}).to_markdown(index=False)))

    # for category in my_formulas.categories():
    #     output_string =  '#  \n<br>\n' + my_formulas.by_category_formula(category).to_html()
    #     file_name = os.getcwd() + os.path.sep +'Experimentation/FormulasByCategory' + os.path.sep  + category + '.md'
    #     with open(file_name, "w") as text_file:
    #         text_file.write(output_string)
    
    delete_docs_folder()
    # display(my_formulas.unique_state_subject_categories().to_numpy(dtype = str) )
    display(my_formulas.unique_state_subject_categories())




array([['NSW', '9', 'Indices'],
       ['NSW', '11_adv', 'Logarithms'],
       ['NSW', '11_adv', 'Differentiation'],
       ['NSW', '12_adv', 'Differentiation'],
       ['NSW', '12_adv', 'Integration'],
       ['NSW', '12_ext_1', 'Differentiation'],
       ['NSW', '12_ext_1', 'Integration'],
       ['NSW', 'nan', 'Differentiation'],
       ['NSW', 'nan', 'Integration'],
       ['NSW', '12_adv', 'Statistics'],
       ['NSW', '9', 'Surds']], dtype='<U15')

$ \sqrt{\dfrac{x}{y}} = \dfrac{\sqrt{x}}{\sqrt{y}} $

In [25]:
my_formulas._df[['State', 'Subject code', 'Category']].drop_duplicates()

Unnamed: 0,State,Subject code,Category
0,NSW,9,Indices
9,NSW,11_adv,Logarithms
14,NSW,11_adv,Differentiation
21,NSW,12_adv,Differentiation
22,NSW,12_adv,Integration
42,NSW,12_ext_1,Differentiation
43,NSW,12_ext_1,Integration
48,NSW,,Differentiation
49,NSW,,Integration
55,NSW,12_adv,Statistics
