In [1]:
import pandas as pd
from pathlib import Path
import configparser
import platform
import os
import validate_bom


In [2]:
def set_folder_defaults():
    if 'macOS' in platform.platform():
        # set some defaults for testing on mac
        download_dir = Path('/Users/mark/Downloads')
        user_dir = download_dir
        sharepoint_dir = download_dir

    elif 'Server' in platform.platform():
        # we're on the azure server (probably)
        user_dir = Path('Z:/python/FilesIn')

        download_dir = Path(user_dir)
        user_dir = download_dir
        sharepoint_dir = Path('Z:/python/FilesOut')

    elif os.getlogin() == 'mark_':
        # my test windows machine
        download_dir = Path('C:/Users/mark_/Downloads')
        user_dir = download_dir
        sharepoint_dir = download_dir        

    else:
        # personal one drive
        user_dir = 'C:/Users/USERNAME'

        # replace USERNAME with current logged on user
        user_dir = user_dir.replace('USERNAME', os.getlogin())

        # read in config file
        config = configparser.ConfigParser()
        config.read('user_directory.ini')

        # read in gm_dir and gm_docs from config file
        gm_dir = Path(config[os.getlogin().lower()]['gm_dir'])
        gm_docs = Path(config[os.getlogin().lower()]['gmt'])
        # this may find more than one sharepoint directory
        # sharepoint_dir = user_dir + "/" + gm_dir + "/" + gm_docs
        sharepoint_dir = Path(user_dir / gm_dir / gm_docs)

        # download_dir = os.path.join(sharepoint_dir, 'Data Shuttle', 'downloads')
        download_dir = Path(sharepoint_dir / 'Data Shuttle' / 'downloads')

    return sharepoint_dir, download_dir, user_dir

In [3]:
def find_files(download_dir, search='*'):
    files = []
    for file in Path(download_dir).glob(search):
        print (file)
        files.append(file)
 
    return files

In [4]:
def write_to_xl(output_file, dict_checks):

    outfile = output_file
    df_dict = dict_checks

    import xlwings as xw
    try:
        wb = xw.Book(output_file)
        print ("writing to existing {}".format(outfile))
    except FileNotFoundError:
        # create a new book
        print ("creating new {}".format(outfile))
        wb = xw.Book()
        # wb.save(outfile)

    for key in df_dict.keys():
        try:
            ws = wb.sheets.add(key)
        except Exception as e:
            print (e)
        
        ws = wb.sheets[key]

        table_name = key

        ws.clear()

        df = df_dict[key]
        if len(df) > 0:
            if table_name in [table.df for table in ws.tables]:
                ws.tables[table_name].update(df)
            else:
                table_name = ws.tables.add(source=ws['A1'],
                                            name=table_name).update(df)

In [5]:
# this gets called if running from this script.  
if __name__ == '__main__':

    sharepoint_dir, download_dir, user_dir = set_folder_defaults()
    file_search = 'Updated*2024*'
    files = find_files(Path(download_dir / 'T48E'), file_search)
    print ("Found {} files for {}".format(len(files), file_search))
    # reverse the sort order
    files.sort(reverse=True)
    # just take the last x files from the top, being the latest
    # files = files[:5]
    # resort from old at top to latest at bottom
    files.sort(reverse=False)

    dict_df = {}

    for file in files:
        with open(file, "rb") as f:
            filename = Path(file).stem
            # reading in the historic excel files
            df = pd.read_excel(f, parse_dates=True)
            f.close()
            # if percent_missing row is there get rid of it
            df = df[df.orig_sort != 'percent_missing']
            # format last export date as date only
            df['Last Export Date'] = pd.to_datetime(df['Last Export Date']).dt.date
            # store the export date from the first row (all the same)
            last_export_date = df['Last Export Date'].head(1).values
            # BOM COUNT is not needed 
            try:
                df.drop(columns='BOM COUNT', inplace=True)
            except KeyError:
                # didn't find BOM COUNT - doesn't matter
                pass
            # take the Variant from the level 0 Title
            variant = df.Title[df.Level == 0]
            # drop packaging function group in case it's in the original extracts
            df = df[~df['Function Group'].str.contains('PACKAGING', na=False)]
            # add the latest validation metrics to our df
            val_dict = validate_bom.main(df)
            # the BOM dataframe will have what we're after
            dict_df[filename] = val_dict['BOM']



C:\Users\mark_\Downloads\T48E\Updated_T48e-01-Z00001_2024-04-15.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48e-01-Z00001_2024-04-19.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48e-01-Z00001_2024-05-22.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48e-01-Z00001_2024-05-23.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48e-01-Z00001_2024-05-24.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48e-01-Z00001_2024-05-28.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48e-01-Z00001_2024-05-30.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48e-01-Z00001_2024-06-04.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48e-01-Z00003_2024-04-15.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48E-01-Z00005_2024-04-15.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48E-01-Z00005_2024-04-19.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48E-01-Z00005_2024-05-22.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48E-01-Z00005_2024-05-23.xlsx
C:\Users\mark_\Downloads\T48E\Updated_T48E-01-Z00005_2024-05-30.xlsx
C:\Users\mark_\Downloads\T48E\Upda

  df.loc[i, 'Parent Source Code'] = level_source[x['Level'] - 1]


No variant lookup found for T48E-01-Z00003 Therefore didn't update variant name


  df.loc[i, 'Parent Source Code'] = level_source[x['Level'] - 1]
  df.loc[i, 'Parent Source Code'] = level_source[x['Level'] - 1]


In [19]:
summary_d = {}

summary_d['xtab'] = pd.DataFrame()
summary_d['xtab_norm'] = pd.DataFrame()

for df in dict_df:
    xtab = pd.crosstab([dict_df[df]['Last Export Date'], dict_df[df]['Variant'], dict_df[df]['Function Group']], dict_df[df]['Source Code'].fillna('Missing'))
    summary_d['xtab'] = pd.concat([summary_d['xtab'],xtab])

    xtab_norm = pd.crosstab([dict_df[df]['Last Export Date'], dict_df[df]['Variant'], dict_df[df]['Function Group']], dict_df[df]['Source Code'].fillna('Missing'), normalize='index')
    summary_d['xtab_norm'] = pd.concat([summary_d['xtab_norm'], xtab_norm])

In [20]:
summary_d.keys()

dict_keys(['xtab', 'xtab_norm'])

In [8]:
write_to_xl('summary_metrics', summary_d)

creating new summary_metrics


In [21]:
def create_heatmap(df, figsize):
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns

       
    hmap = plt.figure(figsize=figsize)
    ax = sns.heatmap(df, annot = True, fmt=".0%", cmap='YlGnBu', annot_kws={'fontsize':8}, linewidths=0.5)
    ax.set(xlabel="", ylabel="")
    ax.xaxis.tick_top()
    plt.rc('xtick', labelsize=10)
    plt.rc('ytick', labelsize=10)
    cbar = ax.collections[0].colorbar
    cbar.set_ticks([0, .2, .75, 1])
    cbar.set_ticklabels(['0%', '20%', '75%', '100%'])
    plt.figure()
    # sns.set(font_scale=.5)
    # plt.show()
    plt.close(hmap)
    return hmap

In [17]:
create_heatmap(summary_d['Updated_T48e-01-Z00001_2024-04-15'], (12,8))

TypeError: float() argument must be a string or a real number, not 'datetime.date'

<Figure size 1200x800 with 0 Axes>