# Extract Daily Oracle Emails and Copy to Spreadsheet

Daily Oracle emails contain 2 types of tables for panels 2, 5, 7:
    1. The first is a current summary of the panels. 
    2. The second is a history table for each panel. Only the lastest history table is needed
    
The emails are extracted, parsed and saved into a consolidated spreadsheet

In [39]:
import win32com.client
import openpyxl
import numpy as np
import datetime

### Open the spreadsheet with the consolidated history
work out:
    1. the most recent email processed 
    2. the last empty row in the spreadsheet where further data can be added
    3. the cell range of the history table (so that it can be deleted)

In [40]:
# Open the spreadsheet which will be populated with the email data

wb = openpyxl.load_workbook('Oracle Panel Data From Email.xlsx')
ws = wb['Oracle Report Data'] # will need to make sure the right sheet is active!!!!!!!!!!!!!!!!

# Get last email date processed in spreadsheet for tables
table_snapshot_recent_date = datetime.date(1900,1, 1) #.strftime("%d-%m-%y") 

#table_history_recent_date = will assume tables dates are consistent so only need table_snapshot_recent_date

# find end of snapshot data in spreadsheet and most recent email date
snapshot_spreadsheet_row = 5 # first row with data

while True:
    if ws.cell(row=snapshot_spreadsheet_row, column=1).value != None:        
        date_string = ws.cell(row=snapshot_spreadsheet_row, column=1).value        
        day = int(date_string[0:2])
        month = int(date_string[3:5])
        year = int('20'+ date_string[6:])        
        excel_date = datetime.date(year, month, day)# .strftime("%d-%m-%y")
        if excel_date > table_snapshot_recent_date:
            table_snapshot_recent_date = excel_date
        snapshot_spreadsheet_row += 1 # increment to next row in spreadsheet
    else:
        break        

        
# find end of history data in spreadsheet
history_spreadsheet_row = 5 # first row with data

while True:
    if ws.cell(row=history_spreadsheet_row, column=9).value != None:  # col 9 in spreadsheet with email_date
        history_spreadsheet_row += 1 # will assume table_snapshot_recent_date is same as most recent history date
    else:
        break        
        

       

### Scan through the emails and copy to spreadsheet
Go through each email in the Oracle email folder and:
    1. if more recent that what is currently in spreasheet then process it
    2. parse the data for the snaphot table and panel 5 history table
    3. write the snapshot data to spreadsheet
    4. record the most recent history for P5, deleting the existing history
    5. save the updated spreadsheet

In [41]:
##################################### Get Outlook Oracle email data and copy to spreadsheet ################################

###### Open outlook and get emails 

outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")

inbox = outlook.GetDefaultFolder(6) # "6" refers to the index of a folder - in this case,
                                    # the inbox. You can change that number to reference
                                    # any other folder


subfldr = inbox.Folders[8] # 8 is the Oracle Report folder I have set up
messages = subfldr.Items

# Dictionary of months used to translate month to ints
dic_month = {'Jan':1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, 'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, 'Nov':11, 'Dec':12}

###### Process each email

# process each email in the Oracle outlook folder
for message in messages: 
    # process email date - this is probably more long winded than it needs to be
    message_creation_date = message.SentOn.strftime("%d/%m/%y")
    message_creation_date_excel = '=datevalue("' + message_creation_date +'")'
    day = int(message_creation_date[0:2])
    month = int(message_creation_date[3:5])
    year = int('20'+ message_creation_date[6:]) 
    message_creation_date_proper = datetime.date(year, month, day)
    
    # only process emails which are more recent than those already in the spreadsheet
    if message_creation_date_proper > table_snapshot_recent_date:
        message_split = message.body.splitlines() # get a new line in list from the email body

        #### Format snapshot table data from email
        table_snapshot = []
        for row in range(3, 6): # the snapshot table has 3 rows of data starting from row 3
            row_string = message_split[row]
            dated = row_string[0 : 9]        
            panel = row_string[10: 20]
            panel_subs = row_string[21: 31]
            unique_stb = row_string[32: 42]
            pct_success = row_string[43: 54]
            row_append = [dated, panel, panel_subs, unique_stb, pct_success]
            table_snapshot.append(row_append)   

        #### Format the Panel 5 history data this starts on row 12
        row = 12
        end_of_table = False
        table_history_p5 = []

        while not end_of_table:
            row_string = message_split[row]
            panel = row_string[0 : 10]        
            on_panel = row_string[11 : 21]
            new_boxes = row_string[22 : 32]
            connected = row_string[33 : 43]
            not_connected = row_string[44 : 57]
            last_day = row_string[58 : 68]
            if new_boxes[0] == '-':
                # end of table
                end_of_table = True
            else:
                row_append = [panel, on_panel, new_boxes, connected, not_connected, last_day]
                table_history_p5.append(row_append)
                row += 1

        ##### Output snaphot table to spreadsheet
        snapshot_rows = len(table_snapshot)
        snapshot_cols = len(table_snapshot[0])

        # Convert table list to numpy array
        np_table_snapshot = np.asarray(table_snapshot)

        # Write the snapshot data to spreadheet
        for row in range(0, snapshot_rows):
            # write the email message creation date to spreadsheet
            ws.cell(row=snapshot_spreadsheet_row, column=1, value=message_creation_date)
            ws.cell(row=snapshot_spreadsheet_row, column=7, value=message_creation_date_excel)           
            for col in range (0, snapshot_cols):
                if col == 0:
                    # dated column
                    date_string = np_table_snapshot[row, col]
                    day = int(date_string[0:2])
                    month = dic_month.get(date_string[3:6])
                    year = int('20' + date_string[7:])
                    dated_excel = '=datevalue("' + datetime.date(year,month, day).strftime("%d/%m/%y") + '")'                    
                    ws.cell(row=snapshot_spreadsheet_row, column=col + 2, value=date_string)
                    ws.cell(row=snapshot_spreadsheet_row, column=col + 8, value=dated_excel)                    
                else:
                    try:
                        ws.cell(row=snapshot_spreadsheet_row, column=col + 2, value=float(np_table_snapshot[row, col]))
                    except:
                        ws.cell(row=snapshot_spreadsheet_row, column=col + 2, value=0)

            snapshot_spreadsheet_row += 1 # increment to next empty row in spreadsheet

        ##### Output History table to spreadsheet
        # Each email contains the history of P5. Only want to keep the most recent history
        # So delete P5 from spreasheet and replace with latest email
        # only need to process the email if more recent than currenly in spreadsheet
        
        if ws.cell(row=5, column=10).value == None:
            # no data in spreadsheet so set to a default early date
            current_history_p5_email_date = datetime.date(1900,1, 1)
        else:            
            date_string = ws.cell(row=5, column=10).value    
            day = int(date_string[0:2])
            month = int(date_string[3:5])
            year = int('20'+ date_string[6:]) 
            current_history_p5_email_date = datetime.date(year, month, day)

        
        if message_creation_date_proper > current_history_p5_email_date:
            # email is more recent than existing data so replace
            
            # delete existing data, but only if there is some
            if history_spreadsheet_row > 5:
                for row in range(5, history_spreadsheet_row + 1):
                    for col in range(10, 19):
                        ws.cell(row=row, column=col, value=None)            
            
            # reset row to start of data range
            history_spreadsheet_row = 5
     

            # Convert table lists to numpy arrays
            np_table_history_p5 = np.asarray(table_history_p5)
                                             
            # number of rows and cols of data in list
            history_rows = len(table_history_p5)
            history_cols = len(table_history_p5[0])
            

            # Write the history data to spreadheet
            for row in range(0, history_rows):
                # write the email message creation date to spreadsheet    
                ws.cell(row=history_spreadsheet_row, column=10, value=message_creation_date)
                ws.cell(row=history_spreadsheet_row, column=17, value=message_creation_date_excel)
                for col in range (0, history_cols):
                    if col == 1:
                        # dated column                   
                        date_string = np_table_history_p5[row, col]
                        day = int(date_string[8:])
                        month = int(date_string[5:7])
                        year = int(date_string[0:4])
                        dated_excel = '=datevalue("' + datetime.date(year,month, day).strftime("%d/%m/%y") + '")'                    
                        ws.cell(row=history_spreadsheet_row, column=col + 11, value=date_string)
                        ws.cell(row=history_spreadsheet_row, column=col + 17, value=dated_excel) 
                    else:                
                        try:
                            ws.cell(row=history_spreadsheet_row, column=col + 11, value=float(np_table_history_p5[row, col]))
                        except:
                            ws.cell(row=history_spreadsheet_row, column=col + 11, value=0)

                history_spreadsheet_row += 1 # increment to next empty row in spreadsheet

   # print(count, message_creation_date)
   # count += 1
   # if count > 10:
   #     break
        
        
# Save the combined spreadsheet
wb.save('Oracle Panel Data From Email.xlsx')  
        
