## Guilty Inventory Report

1.  Put csv data files as below.  Each csv files must have datetime format for the dates.

```
inventory/
 ├─ pandas_ginv.ipynb
 ├─ csv_data/         -- aging.csv, business_unit.csv, rate.csv
 │   ├─inv_data       -- invoice csv files
 │   ├─purchase_data  -- purchase csv files
 │   └─sales_data     -- sales csv files
 └─ reports
 
```
2.  From Jupyter, run the whole notebook.
 
3.  Reports are saved in reports folder.
 

In [137]:
from datetime import date
import glob
import logging
import os

import pandas as pd

logging.basicConfig(level=logging.DEBUG, format=" %(asctime)s -  %(levelname)s -  %(message)s")
# logging.disable(logging.CRITICAL)


In [178]:
def read_inv():
    # Inventory aging preparation from csv data file.
    # Copy the original excel sheet to a new sheet & paste value and save as csv.
    # Add columun "inv_month" with date format like 2019/9/1. (day must be 1st day)

    for files in glob.glob('csv_data/inv_data/*.csv'):
        useCols = ["CatNo", "Material", "Fecha", "Suma de Total Qty", "Suma de Total $", "inv_month"]

        df = pd.read_csv(files, usecols=useCols, parse_dates=[5])
        df = df.rename(
            columns={
                "CatNo": "cat",
                "Material": "model",
                "Fecha": "age",
                "Suma de Total Qty": "qty",
                "Suma de Total $": "amt",
                "inv_month": "month"
            }
        )


        df = df.reindex(columns=['month', 'model', 'cat', 'qty', 'amt', 'age'])

        # Replace the aging date by integer
        agedict = {'30D': 30, '60D': 60, '90D': 90, '180D': 180, '270D': 270, '360D': 360, 'MAS': 999}
        df = df.replace(agedict)

        # Make sure the month to have 1st date of the month.
        df["month"] = df["month"].apply(lambda x: x.replace(day=1))

        # Grouping by month and model.
        grouped = df.groupby(["month", "model", "cat", "age"], as_index=False)
        df = grouped.sum()
        df = df[df['qty'] > 0]
        
        return df


logging.debug('read_inv : return: \n' + str(read_inv()))

 2020-09-27 23:33:36,508 -  DEBUG -  read_inv : return: 
          month               model  cat  age  qty       amt
0    2019-09-01             AL100DR  213   30   50  38844.50
1    2019-09-01             AL100DR  213   90   50  36868.50
2    2019-09-01             AL100DR  213  180   24  17731.92
3    2019-09-01             AL100RD  213   30   10  76563.30
4    2019-09-01           AL100RD/U  213  180   35   3192.70
...         ...                 ...  ...  ...  ...       ...
8192 2020-08-01      SOLVARILL001/4  195  999    4      0.04
8193 2020-08-01      SOLVARILL15MTS  195  360    1      0.01
8194 2020-08-01  SOLVARILLA5/16-1MO  195   90    7    339.15
8195 2020-08-01    SORIELBONDJUMPER  195   60    2    561.36
8196 2020-08-01    SORIELBONDJUMPER  195   90    3    831.52

[8099 rows x 6 columns]


In [57]:
def read_sales():
    # Sales record preparation from csv and add to sqlite3 t_sales table
    # Copy the original sheet to a new sheet & paste value and save as csv.
    # Billing doc. date must be formatted to date (ex: 2020/8/2)
    # Sort by Billing doc. is ideal.

    for targetFile in glob.glob('csv_data/sales_data/*.csv'):
        useCols = ["Billing doc. date", "Material", "Net Qty", "Net $"]

        df = pd.read_csv(targetFile, usecols=useCols, parse_dates=[0])
        df = df.rename(
            columns={
                "Billing doc. date": "month",
                "Material": "model",
                "Net Qty": "qty",
                "Net $": "amt",
            }
        )

        # Modify the sales month to have 1st date of the month
        df["month"] = df["month"].apply(lambda x: x.replace(day=1))
        grouped = df.groupby(["month", "model"], as_index=False)
        df = grouped.sum()
        df = df[df['qty'] > 0]
        return df


logging.debug('read_sales: return: \n' + str(read_sales()))

 2020-09-26 11:14:17,649 -  DEBUG -  read_sales: return: 
          month       model     qty            amt
0    2019-09-01     AL100DR    84.0  102527.860000
1    2019-09-01   AL100RD/U   408.0   52412.050000
2    2019-09-01   AL100RT/U    30.0    4598.250000
3    2019-09-01    AL100TDN    55.0   64295.780000
4    2019-09-01  AL204RTC/U  1320.0  292547.800000
...         ...         ...     ...            ...
2163 2020-08-01     NUAF370   311.0  888944.332500
2164 2020-08-01    NUJB395L    51.0  168375.854865
2165 2020-08-01     PNC751H     1.0   61600.000000
2167 2020-08-01      R21LTF    71.0  481735.650000
2168 2020-08-01      R22GTF     1.0   12730.000000

[2159 rows x 4 columns]


In [62]:
def read_purchase():
    # Purchase preparation from csv and add to sqlite3 t_purchase table
    # Copy the original sheet to a new sheet & paste value and save as csv.

    for files in glob.glob('csv_data/purchase_data/*.csv'):

        useCols = ["Material", "Posting date", "Net Qty", "Net - $"]

        df = pd.read_csv(files, header=0, usecols=useCols, parse_dates=[1])
        df = df.rename(
            columns={
                "Material": "model",
                "Net Qty": "qty",
                "Net - $": "amt",
                "Posting date": "month"
            }
        )


        df = df.reindex(columns=['month', 'model', 'qty', 'amt'])

        # Modify the month to have 1st date of the month
        df["month"] = df["month"].apply(lambda x: x.replace(day=1))

       # Grouping by in_month and inv_model
        grouped = df.groupby(["month", "model"], as_index=False)
        df = grouped.sum()
        df = df[df['qty'] > 0]
        return df


logging.debug('read_purchase: return: \n' + str(read_purchase()))

 2020-09-26 11:25:19,413 -  DEBUG -  read_purchase: return: 
          month     model  qty        amt
0    2019-08-01   AL100DR   50   37313.26
1    2019-08-01  AL100TDN   10    7374.75
2    2019-08-01   AL204TD  270  164136.56
3    2019-08-01   AR016RT   10   75774.58
4    2019-08-01   AR152DR  220   52016.99
...         ...       ...  ...        ...
1712 2020-08-01   MXM6570   10  896823.06
1713 2020-08-01   MXM7570    5  471964.79
1714 2020-08-01    MXRB24    6   16698.45
1715 2020-08-01    MXRB26    9   27578.21
1716 2020-08-01    MXTU16  514  420601.33

[1717 rows x 4 columns]


In [65]:
def read_bu(targetFile):
    # business_unit preparation from csv and add to sqlite3 t_business_unit table

    df = pd.read_csv(targetFile)
    df = df.rename(
        columns={
            "prod_category": "cat",
            "business_unit": "bu",
            "bu_description": "bu_name",
        }
    )

    return df

logging.debug('read_bu: return: \n' + str(read_bu('csv_data/business_unit.csv')))

 2020-09-26 11:30:19,902 -  DEBUG -  read_bu: return: 
    cat    bu        bu_name
0   135  B1-1            MWO
1   187  B1-2            PCI
2   195    C1            SOL
3   196    C1            SOL
4   200    D2            IDP
5   202    D2  IDP B2B AQUOS
6   211  B1-4            CAL
7   213    D1         AL SPL
8   243    D1       PPC D HW
9   244    D1      PPC D SPL
10  117    G1         LCD TV
11  203    G1        LCD B2B


In [226]:
def read_rate(targetFile):
    # Rate table preparation from csv and add to sqlite3 t_rate table

    df = pd.read_csv(targetFile, parse_dates=[0])
    df = df.rename(columns={"rate_month": "month"})
        
    # Modify the inv_month to have 1st date of the month, just in case.
    df["month"] = df["month"].apply(lambda x: x.replace(day=1))

    return df


logging.debug('read_rate: return: \n' + str(read_rate("csv_data/rate.csv")))

 2020-09-28 01:30:37,846 -  DEBUG -  read_rate: return: 
         month  rate_yen
0   2019-08-01      5.07
1   2019-09-01      5.07
2   2019-10-01      5.07
3   2019-11-01      5.07
4   2019-12-01      5.07
..         ...       ...
135 2030-11-01      4.36
136 2030-12-01      4.36
137 2031-01-01      4.36
138 2031-02-01      4.36
139 2031-03-01      4.36

[140 rows x 2 columns]


In [246]:
def read_aging(targetFile):
    # Rate table preparation from csv and add to sqlite3 t_aging table

    df = pd.read_csv(targetFile, index_col=0)
    df = df.rename(
        columns={
            "age_n0": 0,
            "age_n1": 1,
            "age_n2": 2,
            "age_n3": 3,
            "age_n4": 4,
            "age_n5": 5,
            "age_n6": 6,
            "age_n7": 7,
            "age_n8": 8,
            "age_n9": 9,
            "age_n10": 10,
            "age_n11": 11,
            "age_n12": 12,
        }
    )

    df = df.stack()
    df = df.reset_index()
    df = df.rename(columns={"age_actual": "age", "level_1": "month_diff", 0: "new_age"})

    return df


logging.debug("read_aging: return: \n" + str(read_aging("csv_data/aging.csv")))

 2020-09-28 02:11:14,091 -  DEBUG -  read_aging: return: 
    age  month_diff  new_age
0    30           0       30
1    30           1       30
2    30           2       30
3    30           3       30
4    30           4       30
..  ...         ...      ...
86  999           8      270
87  999           9      270
88  999          10      360
89  999          11      360
90  999          12      360

[91 rows x 3 columns]


In [150]:
def latest_ps():
    
    df1 = read_sales()
    df1 = df1[['month', 'model']]
    df2 = read_purchase()
    df2 = df2[['month', 'model']]
    df = df1.append(df2)
    
    # Make a dataframe which has the latest p or s movement by month and model.
    df = df.drop_duplicates()
    df = df.sort_values(['month', 'model'], ascending=[False, True])
    
    return df


logging.debug('return : \n' + str(latest_ps()))  

 2020-09-27 22:26:22,766 -  DEBUG -  return : 
          month               model
1999 2020-08-01          2TC32CF2UR
2000 2020-08-01          2TC45CF2UR
2001 2020-08-01           4TB60CJ1U
2002 2020-08-01           4TB70CJ1U
2003 2020-08-01          4TC60BK2UD
...         ...                 ...
119  2019-08-01     SOLTUERCAFL5/16
120  2019-08-01  SOLTUERCAHEX5/16OX
121  2019-08-01      SOLVARILL15MTS
122  2019-08-01          WC-COA-MPE
123  2019-08-01          WC-COA-PRO

[2831 rows x 2 columns]


In [248]:
def new_age():

    try:
        df1 = read_inv()
        df1 = df1.drop(["cat", "qty", "amt"], axis=1)
        df2 = latest_ps()
        df = pd.merge(df1, df2, on="model", how="left", suffixes=["_inv", "_ps"])
        df = df.loc[df["month_inv"] >= df["month_ps"]]

        # Make a dataframe which has the latest p or s movement by model.
        grouped = df.groupby(["month_inv", "model", "age"])
        df = df.loc[grouped["month_ps"].idxmax(), :]

        # Add month_diff column
        df["month_diff"] = (df["month_inv"].dt.year - df["month_ps"].dt.year) * 12 + (
            df["month_inv"].dt.month - df["month_ps"].dt.month
        )

        # Make inv list recovering the dropped item with month diff = 11.
        df = df.rename(columns={"month_inv": "month"})
        df = pd.merge(df1, df, on=["month", "model", "age"], how="left")
        df = df.fillna({"month_diff": 999})

        # Convert month_diff to new_age
        df3 = read_aging("csv_data/aging.csv")
        df = pd.merge(df, df3, on=["age", "month_diff"], how="left")

    except Exception as e:
        print(e)

    else:
        print("new_age(): Successfully finished without error")
        return df


logging.debug("return : \n" + str(new_age()))

 2020-09-28 02:12:00,371 -  DEBUG -  return : 
          month               model  age   month_ps  month_diff  new_age
0    2019-09-01             AL100DR   30 2019-09-01         0.0     30.0
1    2019-09-01             AL100DR   90 2019-09-01         0.0     30.0
2    2019-09-01             AL100DR  180 2019-09-01         0.0     30.0
3    2019-09-01             AL100RD   30 2019-09-01         0.0     30.0
4    2019-09-01           AL100RD/U  180 2019-09-01         0.0     30.0
...         ...                 ...  ...        ...         ...      ...
8094 2020-08-01      SOLVARILL001/4  999 2020-06-01         2.0     60.0
8095 2020-08-01      SOLVARILL15MTS  360 2020-06-01         2.0     60.0
8096 2020-08-01  SOLVARILLA5/16-1MO   90 2020-06-01         2.0     60.0
8097 2020-08-01    SORIELBONDJUMPER   60 2020-07-01         1.0     30.0
8098 2020-08-01    SORIELBONDJUMPER   90 2020-07-01         1.0     30.0

[8099 rows x 6 columns]


new_age(): Successfully finished without error
