Output_2_Excel


In [1]:
import pandas as pd
#import openpyxl

In [2]:
df = pd.read_excel('../data/sampledfinput.xlsx', sheet_name='sampledata')
df2 = df

template_file = '../data/sampOutput.xlsx'


In [3]:
output_dict1 = {'var_name': df,
               'tab': 'first_page',
               'start_row': 7,
               'start_col': 1,
               'header': False,
               'index': False}

output_dict2 = {'var_name': df2,
               'tab': 'second_page',
               'start_row': 7,
               'start_col': 1,
               'header': True,
               'index': True}

output_dict = {'data_1': output_dict1,
              'data_2': output_dict2}


In [4]:
def multiple_dfs_to_xl(output_dict, template_file, header=False):
    '''
    Inputs
        output_dict - a dictionary comprised of dictionaries. The keys of the outter-most dictionary are the 
        
        The inner dictionaries are comprised of the following keys
        
            df - the dataframe that will be sent to excel
            template_file - the path and filename of the excel workbook template the dataframes will be written to
            output_tab - the tab in the output file to send the data to.
            start_row - the first row where the data will appear. NOTE: Python treats cell A1 as row zero.
            start_column - the first column where the data will appear. NOTE: Python treats cell A1 as row zero.
            header - if the header is True, it will send in the column names of the dataframe. 
                If header is false, no column names will be sent to the excel file.
                If header contains a list of strings, the list of strings will be an alias to the column names
            index - boolean variable that determines whether the index of the dataframe will be sent to excel.
    
    This function copies, renames (with a timestamp into the same directory) and opens the renamed copy of the workbook 
    template_file.   Then it reads in all of the sheets of the opened workbook and sends multiple
    dataframes to multiple tabs at specific locations on the the excel sheet. The location is determined by the start_row and
    start_column contained in the dictionaries.
        
    Python Modules to be imported for this to work
        pandas
        openpyxl
        '''
    import openpyxl
    import datetime
    import shutil
    
    # create a datestamp 
    x = datetime.datetime.now().strftime('%d''%b''%y''%H''%M''%S')
    
    #parse string with template name and concatenate with above timestamp
    prefix_index = template_file.rfind('/')+1
    prefix = template_file[:prefix_index]
    suffix_index = template_file.rfind('.')
    suffix = template_file[suffix_index:]
    fylename = template_file[prefix_index:suffix_index] + x
    new_copied_file_name = prefix + fylename + suffix
    
    #copy template and save with new concatenated name
    output_file = shutil.copy(template_file, new_copied_file_name) 
         
    #open workbook to write to, discover tabs in workbook
    book = openpyxl.load_workbook(output_file)
    writer = pd.ExcelWriter(output_file, engine = 'openpyxl')
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

    #iterate through dictionary and output dataframes to worksheets
    for key in output_dict.keys():

        output_dict[key]['var_name'].to_excel(writer, 
                                              sheet_name = output_dict[key]['tab'], 
                                              startrow = output_dict[key]['start_row'], 
                                              startcol = output_dict[key]['start_col'], 
                                              header = output_dict[key]['header'], 
                                              index=False)

    writer.save()
    writer.close()
    print('DataFrame is written successfully to Excel File.')
    return None

In [5]:
template_file = template_file
multiple_dfs_to_xl(output_dict, template_file)

DataFrame is written successfully to Excel File.


In [None]:

def df_to_xl(df, output_file, output_tab, start_row=0, start_column=0, header=False):
    '''
    Inputs
        df - the dataframe that will be sent to excel
        output_file - the path and filename of the output file
        output_tab - the tab in the output file to send the data to.
        start_row - the first row where the data will appear. NOTE: Python treats cell A1 as row zero.
        start_column - the first column where the data will appear. NOTE: Python treats cell A1 as row zero.
        header - if the header is True, it will send in the column names of the dataframe. 
                If header is false, no column names will be sent to the excel file.
                If header contains a list of strings, the list of strings will be an alias to the column names
    
    This function opens the workbook output_file, reads in all of the sheets to that file and sends
    the dataframe df to a location on the output_tab sheet. The location is determined by the start_row and
    start_column sent to the function.
        
    Python Modules to be imported for this to work
        pandas
        openpyxl
    '''
    
    book = openpyxl.load_workbook(output_file)

    writer = pd.ExcelWriter(output_file, engine = 'openpyxl')
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

    df.to_excel(writer, 
                sheet_name=output_tab, 
                startrow = start_row, 
                startcol = start_col, 
                header = header, 
                index=False)

    writer.save()

    writer.close()

    print('DataFrame is written successfully to Excel File.')
    
    return None


In [None]:
#df_to_xl(df, path, output_tab, start_row, start_col)