# Output_2_Excel

## send dataframe(s) to excel

In [None]:
import pandas as pd
#import openpyxl

In [None]:
def df_to_xl(df, output_file, output_tab, start_row=0, start_column=0, header=False):
    '''
    This function opens the workbook output_file, reads in all of the sheets to that file and sends
    the dataframe df to a location on the output_tab sheet. The location is determined by the start_row and
    start_column sent to the function.
    
    Inputs
        df - the dataframe that will be sent to excel
        output_file - the path and filename of the output file
        output_tab - the tab in the output file to send the data to.
        start_row - the first row where the data will appear. NOTE: Python treats cell A1 as row zero.
        start_column - the first column where the data will appear. NOTE: Python treats cell A1 as column zero.
        header - if the header is True, it will send the column names of the dataframe to excel. 
                If header is false, no column names will be sent to the excel file.
                If header is set equal to a list of strings, the list of strings will be an alias to the column names and will
                    be sent to the excel file and the column names will not.
        
    Python Modules to be imported for this to work
        pandas
        openpyxl
    '''
    
    #Importing openpyxl module
    import openpyxl
    
    # Open the output file using openpyxl module
    book = openpyxl.load_workbook(output_file)

    # creating variable 'writer' that reads in all of the data in the output_file spreadsheet
    # Loading the spreadsheet with openpyxl allows python to write the spreadsheet back to the file with new data
    writer = pd.ExcelWriter(output_file, engine = 'openpyxl')
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

    # sending the dataframe 'df' to the 'writer' variable and specifying where this dataframe should go in the spreadsheet
    df.to_excel(writer, 
                sheet_name=output_tab, 
                startrow = start_row, 
                startcol = start_col, 
                header = header, 
                index=False)

    writer.save()

    writer.close()

    print('DataFrame is written successfully to Excel File.')
    
    return None


In [None]:
df = pd.read_excel('../data/sampledfinput.xlsx', sheet_name='sampledata')
df2 = df

template_path_and_file = '../data/sampOutput.xlsx'


In [None]:
#df_to_xl(df, path, output_tab, start_row, start_col)

## Sending multiple dataframes to excel

In [None]:
output_dict1 = {'var_name': df,
               'tab': 'first_page',
               'start_row': 7,
               'start_col': 1,
               'header': False,
               'index': False}

output_dict2 = {'var_name': df2,
               'tab': 'second_page',
               'start_row': 7,
               'start_col': 1,
               'header': True,
               'index': True}

output_dict = {'data_1': output_dict1,
              'data_2': output_dict2}


In [None]:
def multiple_dfs_to_xl(output_dict, template_path_and_file, save_path_and_file=False, time_stamp_format=False,):
    '''
    This function copies the 'template_file' excel workbook, renames the workbook (with a timestamp into the same directory) and 
    opens the renamed copy of the workbook 'template_file'.   Then it reads in all of the sheets of the opened workbook and sends multiple
    dataframes to multiple tabs at specific locations on the the excel sheet. The 'tab' fields in the dictionaries determine which
    worksheet the data gets sent to and the location within that worksheet is determined by the start_row and
    start_column contained in the dictionaries.
    
    Inputs
        output_dict - a dictionary comprised of dictionaries. The keys of the outter-most dictionary are the 
        
    
        output_dict = {'data_1': {'var_name': df,
                              'tab': 'first_page',
                              'start_row': 7,
                              'start_col': 1,
                              'header': False,
                              'index': False},
                  'data_2': {'var_name': df,
                             'tab': 'first_page',
                             'start_row': 7,
                             'start_col': 1,
                             'header': False,
                             'index': False}
                  }

            The inner dictionaries are comprised of the following keys
        
            df - the dataframe that will be sent to excel
            template_file - the path and filename of the excel workbook template the dataframes will be written to
            output_tab - the tab in the output file to send the data to.
            start_row - the first row where the data will appear. NOTE: Python treats cell A1 as row zero.
            start_column - the first column where the data will appear. NOTE: Python treats cell A1 as row zero.
            header - if the header is True, it will send in the column names of the dataframe. 
                If header is false, no column names will be sent to the excel file.
                If header contains a list of strings, the list of strings will be an alias to the column names
            index - boolean variable that determines whether the index of the dataframe will be sent to excel.
        
        template_path_and_file - an excel workbook (including that path to the workbook) that contains the 
            formatted worksheets that will recieve the dataframes from python
        
        save_path_and_file - Optional - if provided it can be a path only (ex.. '../data/') 
            or path and outfile name (ex.. (ex.. '../data/sample_output_file_name.xlsx') ) if omitted the 
            output file will retain the same name as the template with a timestamp appended to the filename saved
            in the same directory as the template.  
            Timestamp will append to filename if no filename is passed.  
            Timestamp will also append to filename if a save_path_and_file filename is provided but only if one of the 
                acceptable Timestamp values(see below) are included in the function call. 
        
        time_stamp_format - Optional - 
            Acceptable values are as follows:
             * 'hr' - (ex.. 22Jul2019)
             * 'min' - (ex.. 22Jul201901)
             * 'sec' - (ex.. 22Jul20190127)
             * 'def_ts' - to explicitly use the default timestamp format (ex.. 22Jul20) 
             * 'omit' - to explicitly not add a timestamp to the output file name
            If the time_stamp_format is not supplied or any other value that is not defined above is used
            the default timestamp (ex..22Jul20) will be added to the output file name.
             
             
    Python Modules to be imported for this to work
        pandas
        openpyxl
        shutil
        os
        '''
    
    import openpyxl
    import datetime
    import shutil
    import os

    
#Process Excel Template path and filename    
#check to make sure the template path and file exists as specified  

    if os.path.isfile(template_path_and_file):
        print('Template path and file confirmedevalutating template')
    else:
        print('''Error:path/file as input for template_path_and_file doesn't exist.
         Troubleshooting tips:
        >Make sure spelling is correct
        >Use forward slashes /'s in your path
        >Make sure you can access the file from your computer (Is it on your harddisk or the network? Do you need VPN?)
        >Call Dave (408)536.6000''')
        return
    
    template_path_and_file_dict = {'file_path':False,
                                  'file_name': False,
                                  'time_stamp': False,
                                  'file_name_suffix': False}

    file_path_index = template_path_and_file.rfind('/')+1
    template_path_and_file_dict['file_path'] = template_path_and_file[:file_path_index] 
    
    suffix_index = template_path_and_file.rfind('.')
    template_path_and_file_dict['file_name_suffix'] = template_path_and_file[suffix_index:]
    
    template_path_and_file_dict['file_name'] = template_path_and_file[file_path_index:suffix_index]
  
    
#Process output path and file 
    save_file_and_path_dict = {'file_path':False,
                              'file_name': False,
                              'time_stamp': False,
                              'file_name_suffix': False}

    if not save_path_and_file == False:
       

        save_file_path_index = save_path_and_file.rfind('/')+1
        save_path_and_file_dict['file_path'] = save_path_and_file[:save_file_path_index]

        if os.path.isdir(save_path_and_file_dict['file_path']):
            print("save directory validated")
        else:
            print('''Error:output directory for output file doesn't exist.
             Troubleshooting tips:
            >Make sure spelling is correct
            >Use forward slashes /'s in your path
            >Make sure you can access the output directory from your computer (Is it on your harddisk or the network? Do you need VPN?)
            >Call Dave (408)536.6000''')
            return  
    
        save_suffix_index = save_path_and_file.rfind('.')
        save_path_and_file_dict['file_name_suffix'] = save_path_and_file[save_suffix_index:]
  
        save_path_and_file_dict['file_name'] = save_path_and_file[save_file_path_index:save_suffix_index]
    else:
        save_path_and_file_dict = template_path_and_file_dict

    
#Process TimeStamp 
    #create timestamp format options dictionary 
    timestamp_dictionary = {'hr': '%d''%b''%y''%H',
                           'min': '%d''%b''%y''%H''%M',
                           'sec': '%d''%b''%y''%H''%M''%S' ,
                           'def_ts': '%d''%b''%y',
                           'omit':''}
    
    if time_stamp_format:
            if time_stamp_format in timestamp_dictionary: 
                time_stamp_string = timestamp_dictionary[time_stamp_format]
            else:
                time_stamp_format = 'def_ts'
                time_stamp_string = timestamp_dictionary[time_stamp_format]
    else:
        time_stamp_format = 'def_ts'
        time_stamp_string = timestamp_dictionary[time_stamp_format]
    
    save_path_and_file_dict['time_stamp'] = datetime.datetime.now().strftime(time_stamp_string)    
    
#create save file output path and name   
    new_file_name = None
    for items in save_file_and_path_dict:
        if not save_file_and_path_dict[items] == False:
            if new_file_name:
                new_file_name = new_file_name + save_file_and_path_dict[items]
            else:
                new_file_name = save_file_and_path_dict[items]
    new_copied_file_name = new_file_name  
    
    
    
    
    
    
#copy template and save with new concatenated name using shutil module
    output_file = shutil.copy(template_path_and_file, new_copied_file_name) 

    
    
##    if save_path_and_file_dict['file_name_suffix'].lower() == '.xls' or '.xlsx':
##        print(save_path_and_file_dict['file_name_suffix'])
    
    
###    # create a datestamp to be added to the output workbook that gets saved
###    x = datetime.datetime.now().strftime('%d''%b''%y''%H''%M''%S')    
    
    
#Copy template to new name and path 
#    # WORKING ON THE file_name_and_path to create a new filename for the output excel file.
#    # Finding the last '/' in the template_file_and_path to separate the path from the filename
#    file_path_index = template_path_and_file.rfind('/')+1
#    file_path = template_path_and_file[:file_path_index]
    
#    # Finding the extenstion (.xlsx) from the template_path_and_filename
#    suffix_index = template_path_and_file.rfind('.')
#    suffix = template_path_and_file[suffix_index:]
    
#   # Finding the filename from the template_path_and_filename
#    filename = template_path_and_file[file_path_index:suffix_index] + file_name_time_stamp + suffix
#    new_copied_file_name = file_path + filename 
    
       
    
    
    
#Output Dataframes to Workbook    
    #Using openpyxl module to open workbook to write to, discover tabs in workbook
    book = openpyxl.load_workbook(output_file)
    writer = pd.ExcelWriter(output_file, engine = 'openpyxl')
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

    #iterate through dictionary and output dataframes to worksheets
    for key in output_dict.keys():

        output_dict[key]['var_name'].to_excel(writer, 
                                              sheet_name = output_dict[key]['tab'], 
                                              startrow = output_dict[key]['start_row'], 
                                              startcol = output_dict[key]['start_col'], 
                                              header = output_dict[key]['header'], 
                                              index=False)

    writer.save()
    writer.close()
    print('DataFrame is written successfully to Excel File.')
    return None

In [None]:
   #path_and_file_new 
    #path_and_file_time_stamp
    #path_and_file_new_name
    
    
 #   save_path_and_file = '../data/sampleoutputfileinput.XLSx'
save_path_and_file = '../data/'
 
    
if time_stamp_format:
    path_and_file_time_stamp = 1
      
if save_path_and_file:
    path_and_file_option = 1
else:
    path_and_file_option = 0
    
file_path_index = save_path_and_file.rfind('/')+1
file_path = save_path_and_file[:file_path_index]
print(file_path_index)
    
suffix_index = path_and_file.rfind('.')
suffix = path_and_file[suffix_index:]
  
if suffix.lower() == '.xls' or '.xlsx':
    path_and_file_new_name = 1
    print(suffix)

In [None]:
template_file = template_file
multiple_dfs_to_xl(output_dict, template_file)

In [None]:

if suffix.lower() == '.xls' or '.xlsx':
    path_and_file_new_name = 1
    print(suffix)


In [65]:
def filename_to_xl(template_path_and_file, save_path_and_file=False, time_stamp_format=False):
    import openpyxl
    import datetime
    import shutil
    import os

    #troubleshooting tips
    tips = '''Troubleshooting tips:
        >Make sure spelling is correct
        >Use forward slashes /'s in your path
        >Make sure you can access the file from your computer (Is it on your harddisk or the network? Do you need VPN?)
        >Call the Python Hotline (408)536.2086'''
    
    
#Process Excel Template path and filename    
    #check to make sure the template path and file exists as specified  
    if os.path.isfile(template_path_and_file):
        txt = "Template confirmed! {directory} "
        print(txt.format(directory = template_path_and_file))
    else:
        txt = "Error: Template path/file {directory} doesn't exist."
        print(txt.format(directory = template_path_and_file))
        print(tips)
        return

    # create a dictionary for the template path and filename
    template_path_and_file_dict = {'file_path':False,
                                  'file_name': False,
                                  'time_stamp': False,
                                  'file_name_suffix': False}
    #find the index where the path ends and the filename begins
    file_path_index = template_path_and_file.rfind('/')+1
    #update dictionary with the path
    template_path_and_file_dict['file_path'] = template_path_and_file[:file_path_index] 

    #find the index where the filename extension begins
    suffix_index = template_path_and_file.rfind('.')
    #update dictionary with the path
    template_path_and_file_dict['file_name_suffix'] = template_path_and_file[suffix_index:]
    #update dictionary with filename
    template_path_and_file_dict['file_name'] = template_path_and_file[file_path_index:suffix_index]
    
#Process output path and filename 
    # create a dictionary for the save path and filename
    save_path_and_file_dict = {'file_path':False,
                              'file_name': False,
                              'time_stamp': False,
                              'file_name_suffix': False}
    #check to see if there is a value for the output path and filename
    if not save_path_and_file == False:
        #locate the end of the path for the output
        save_file_path_index = save_path_and_file.rfind('/')+1
        #is there a fwd slash?
        if not save_file_path_index == 0: 
            #yes there is fwd slash
            #is the last character of the save_path_and_file string a fwd slash
            if save_path_and_file[-1] == '/':
                #yes last character is a fwd slash, update dictionary with path and use template name for filename 
                save_path_and_file_dict['file_path'] = save_path_and_file[:save_file_path_index]
                save_path_and_file_dict['file_name'] = template_path_and_file_dict['file_name']
                save_path_and_file_dict['file_name_suffix'] = template_path_and_file_dict['file_name_suffix']

            else:
                #no last character isn't a fwd slash.
                #check for presence of an excel filename by searching for the string'.xls' (also works for '.xlsx')
                if save_path_and_file.rfind('.xls') > 0:
                    #yes '.xls' was found.  Therefore there is a filename included in save_path_and_file string
                    #add path, suffix and filename to dictionary
                    save_path_and_file_dict['file_path'] = save_path_and_file[:save_file_path_index]
                    save_suffix_index = save_path_and_file.rfind('.')
                    save_path_and_file_dict['file_name_suffix'] = save_path_and_file[save_suffix_index:]
                    save_path_and_file_dict['file_name'] = save_path_and_file[save_file_path_index:save_suffix_index]

                else:
                    #no '.xls' found so there is no filename included in save_path_and_file string
                    #this also means there wasn't a forward slash included at the end of the save_path_and_file string
                    #add a fwd slash to the path and update the path to the dictionary
                    #update suffix and filename to dictionary with filename and suffix values in the template dictionary 
                    save_path_and_file_dict['file_path'] = save_path_and_file + '/'
                    save_path_and_file_dict['file_name'] = template_path_and_file_dict['file_name']
                    save_path_and_file_dict['file_name_suffix'] = template_path_and_file_dict['file_name_suffix']
                    
        else:
            #no fwd slash was included in the save_path_and_file string. Therefore it is only a filename in the string.
            #has a valid filename been submitted?
            if save_path_and_file.rfind('.xls') > 0:
                #yes '.xls' was found.  Therefore a valid filename is in the save_path_and_file string
                #update save_path_and_file_dict with path from template dictionary
                #update save_path_and_file_dict with filename and suffix from save_path_and_file string 
                save_path_and_file_dict['file_path'] =  template_path_and_file_dict['file_path']
                save_suffix_index = save_path_and_file.rfind('.')
                save_path_and_file_dict['file_name_suffix'] = save_path_and_file[save_suffix_index:]
                save_path_and_file_dict['file_name'] = save_path_and_file[:save_suffix_index]
            else:
                #no '.xls' found so there isn't a valid filename included in save_path_and_file string
                #print error message and exit function
                txt = "Error: Filename supplied {directory} is invalid. Filename must include '.xlsx'"
                txt2 = "Example: {directory}.xlsx"
                print(txt.format(directory = save_path_and_file))
                print(txt2.format(directory = save_path_and_file))
                print(tips)
                return

    else:
        #no save_path_and_file input, use template path, filename and suffix for output file
        save_path_and_file_dict = template_path_and_file_dict
            
    #check to make sure the output path exists as specified  
    if os.path.isdir(save_path_and_file_dict['file_path']):
        txt = "Save directory confirmed! {directory} "
        print(txt.format(directory = save_path_and_file_dict['file_path']))
    else:
        txt = "Error: Save directory {directory} doesn't exist."
        print(txt.format(directory= save_path_and_file_dict['file_path']))
        return  
    
    print('\n')
    print(template_path_and_file_dict)
    print(save_path_and_file_dict)
    
    
#Process TimeStamp 
    #create timestamp format options dictionary 
    timestamp_dictionary = {'hr': '%d''%b''%y''%H',
                           'min': '%d''%b''%y''%H''%M',
                           'sec': '%d''%b''%y''%H''%M''%S' ,
                           'def_ts': '%d''%b''%y',
                           'omit':''}
    #has a value been supplied for the time_stamp_format option?
    if time_stamp_format:
            #yes time_stamp_format supplied
            #is the format found in the timestamp_dictionary?
            if time_stamp_format in timestamp_dictionary:
                #yes format found, use corresponding format from dictionary
                time_stamp_string = timestamp_dictionary[time_stamp_format]
            else:
                #no format not found, use default timestamp format from dictionary
                time_stamp_format = 'def_ts'
                time_stamp_string = timestamp_dictionary[time_stamp_format]
    else:
        #no time_stamp_format not supplied, use default timestamp format from dictionary  
        time_stamp_format = 'def_ts'
        time_stamp_string = timestamp_dictionary[time_stamp_format]
    #update save_path_and_file_dict with outcome of timestamp routine along with a '_'
    save_path_and_file_dict['time_stamp'] = '_' +  datetime.datetime.now().strftime(time_stamp_string)
    #will the template details be used for the output file?
    if not save_path_and_file_dict == template_path_and_file_dict:
        #no output file and path will not be the same as the template
        #is timestamp only the '-' character?
        if save_path_and_file_dict['time_stamp'] == '_':
            #yes, since path and filename are different from the template the '_' is not necessary and is removed.
            save_path_and_file_dict['time_stamp'] = ''
        
###for debug remove from final version
    print('\n')
    print(template_path_and_file_dict)
    print(save_path_and_file_dict) 


#create save file output path and name 
    #initialize path and filename variable
    new_file_name = None
    #loop through the save_path_and_file_dict to build the output path and filename 
    for items in save_path_and_file_dict:
        #ignore save_path_and_file_dict items if they are set to false, like when a timestamp is 'omit'ted. 
        if not save_path_and_file_dict[items] == False:
            #is this the first item in the dictionary being processed?
            if new_file_name:
                #no, so concatenate existing filename with next item in the dictionary
                new_file_name = new_file_name + save_path_and_file_dict[items]
            else:
                #yes, first time through so set the path and filename variable equal to the first dictionary item(path)
                new_file_name = save_path_and_file_dict[items]
        #give concatenated path and file name a new variable to disambiguate        
        new_copied_file_name = new_file_name  
    
    print('\n')
    print(new_copied_file_name)
    return None

In [62]:
#    template_path_and_file = False
#    save_path_and_file = False
template_path_and_file = '../data/sampOutput.xlsx'
#save_path_and_file = 'sampOutput2.xlsx'
save_path_and_file = '../data/sample/sampOutput2.xlsx'
#save_path_and_file = '..sampled'

time_stamp_format = 'sec'
print(save_path_and_file)

#filename_to_xl(template_path_and_file) 
#filename_to_xl(template_path_and_file, save_path_and_file) 
filename_to_xl(template_path_and_file, save_path_and_file, time_stamp_format) 
#filename_to_xl(template_path_and_file, time_stamp_format  = 'omit') 

../data/sample/sampOutput2.xlsx
Template confirmed! ../data/sampOutput.xlsx 
Save directory confirmed! ../data/sample/ 


{'file_path': '../data/', 'file_name': 'sampOutput', 'time_stamp': False, 'file_name_suffix': '.xlsx'}
{'file_path': '../data/sample/', 'file_name': 'sampOutput2', 'time_stamp': False, 'file_name_suffix': '.xlsx'}


{'file_path': '../data/', 'file_name': 'sampOutput', 'time_stamp': False, 'file_name_suffix': '.xlsx'}
{'file_path': '../data/sample/', 'file_name': 'sampOutput2', 'time_stamp': '_27Jul20003155', 'file_name_suffix': '.xlsx'}
None
../data/sample/
../data/sample/sampOutput2
../data/sample/sampOutput2_27Jul20003155


../data/sample/sampOutput2_27Jul20003155.xlsx


In [None]:
save_path_and_file = 'sampOutput2.'
save_path_and_file.rfind('.xls')