In [9]:
import pandas as pd
import numpy as np
import os
from glob import iglob

from datetime import datetime
import matplotlib.pyplot as plt

# my modules
import set_config
from common_functions import create_df_from_file


In [10]:
# call set_config
dir_sanofi_share = set_config.ConfigSectionMap("SectionOne")['sanofi']
dir_local = set_config.ConfigSectionMap("SectionOne")['local']

In [11]:
folder = 'IP21_data'

# call function with dir, folder, search criteria to find files, name of dataframe to create
# these are the bad pen counts
df_ORDER = create_df_from_file(dir_sanofi_share, folder, "_ORDER")
df_ORDER['IP_TREND_TIME'] = pd.to_datetime(df_ORDER['IP_TREND_TIME'], format='%d-%b-%y %H:%M:%S.%f')

In [12]:
names = df_ORDER.Name.unique()
names

array(['36630901_ORDERNUMBER', '36630901_ZA_ORDERNUMBER',
       '36650901_ORDERNUMBER'], dtype=object)

In [13]:
df_ORDER.groupby('Name')['IP_TREND_VALUE'].describe()

Unnamed: 0_level_0,count,unique,top,freq
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
36630901_ORDERNUMBER,45.0,39.0,80764309.0,3.0
36630901_ZA_ORDERNUMBER,45.0,13.0,1F004,8.0
36650901_ORDERNUMBER,83.0,48.0,80760793.0,7.0


In [14]:
df_36630901_ORDERNUMBER = df_ORDER[(df_ORDER.Name == '36630901_ORDERNUMBER') & (df_ORDER.IP_TREND_VALUE.isna() == False)].copy()
df_36650901_ORDERNUMBER = df_ORDER[(df_ORDER.Name == '36650901_ORDERNUMBER') & (df_ORDER.IP_TREND_VALUE.isna() == False)].copy()
df_36630901_ZA_ORDERNUMBER = df_ORDER[(df_ORDER.Name == '36630901_ZA_ORDERNUMBER') & (df_ORDER.IP_TREND_VALUE.isna() == False)].copy()

df_36630901_ORDERNUMBER.rename(columns={'IP_TREND_VALUE':'36630901_ORDERNUMBER'}, inplace=True)
df_36650901_ORDERNUMBER.rename(columns={'IP_TREND_VALUE':'36650901_ORDERNUMBER'}, inplace=True)
df_36630901_ZA_ORDERNUMBER.rename(columns={'IP_TREND_VALUE':'36630901_ZA_ORDERNUMBER'}, inplace=True)

merge = pd.merge(df_36630901_ORDERNUMBER[['IP_TREND_TIME','36630901_ORDERNUMBER']], df_36630901_ZA_ORDERNUMBER[['IP_TREND_TIME','36630901_ZA_ORDERNUMBER']], on='IP_TREND_TIME')
merge2 = pd.merge(merge, df_36650901_ORDERNUMBER[['IP_TREND_TIME','36650901_ORDERNUMBER']], on='IP_TREND_TIME', how='outer')

In [15]:
merge2.sort_values('IP_TREND_TIME', inplace=True)
merge2

Unnamed: 0,IP_TREND_TIME,36630901_ORDERNUMBER,36630901_ZA_ORDERNUMBER,36650901_ORDERNUMBER
10,2021-08-02 20:43:03.700,80760843.0,1F022,
69,2021-08-02 22:51:44.100,,,80757084.0
68,2021-08-03 19:56:16.900,,,80756656.0
67,2021-08-04 00:59:12.800,,,80756289.0
9,2021-08-04 06:46:41.500,80760846.0,1F022,
...,...,...,...,...
73,2021-09-29 21:00:03.800,,,80763694.0
72,2021-09-29 23:19:26.600,,,80763694.0
71,2021-09-29 23:20:40.000,,,80763694.0
11,2021-09-30 12:40:19.400,80765625.0,1F004,


In [50]:
df_BATCHID = create_df_from_file(dir_sanofi_share, folder, "_BATCHID.csv")
df_BATCHID['IP_TREND_TIME'] = pd.to_datetime(df_BATCHID['IP_TREND_TIME'], format='%d-%b-%y %H:%M:%S.%f')

df_MATNO = create_df_from_file(dir_sanofi_share, folder, "_MATNO.csv")
# no value in keeping rows without an IP_TREND_VALUE as that's the only info we're after
df_MATNO = df_MATNO[df_MATNO['IP_TREND_VALUE'].isna() == False]
df_MATNO = df_MATNO[df_MATNO['IP_TREND_QLEVEL'] == 'Good']
df_MATNO['IP_TREND_TIME'] = pd.to_datetime(df_MATNO['IP_TREND_TIME'], format='%d-%b-%y %H:%M:%S.%f')


In [51]:
merge3 = pd.merge(merge2, df_BATCHID[['IP_TREND_TIME','IP_TREND_VALUE']], left_on='IP_TREND_TIME', right_on='IP_TREND_TIME', how='left')
merge3.rename(columns={'IP_TREND_VALUE':'BATCH_ID'}, inplace=True)

In [52]:
merge4 = pd.merge(merge3, df_MATNO[['IP_TREND_TIME','IP_TREND_VALUE']], left_on='IP_TREND_TIME', right_on='IP_TREND_TIME', how='left')
merge4.rename(columns={'IP_TREND_VALUE':'BATCH_SIZE'}, inplace=True)

In [53]:
merge2.sort_values('IP_TREND_TIME')

Unnamed: 0,IP_TREND_TIME,36630901_ORDERNUMBER,36630901_ZA_ORDERNUMBER,36650901_ORDERNUMBER
10,2021-08-02 20:43:03.700,80760843.0,1F022,
69,2021-08-02 22:51:44.100,,,80757084.0
68,2021-08-03 19:56:16.900,,,80756656.0
67,2021-08-04 00:59:12.800,,,80756289.0
9,2021-08-04 06:46:41.500,80760846.0,1F022,
...,...,...,...,...
73,2021-09-29 21:00:03.800,,,80763694.0
72,2021-09-29 23:19:26.600,,,80763694.0
71,2021-09-29 23:20:40.000,,,80763694.0
11,2021-09-30 12:40:19.400,80765625.0,1F004,


In [54]:
df_MATNO[df_MATNO['IP_TREND_QLEVEL'] == 'Good']


Unnamed: 0,Name,IP_TREND_TIME,IP_TREND_VALUE,IP_TREND_QLEVEL,IP_TREND_QSTATUS
0,36630901_MATNO,2021-08-18 10:57:46.200,540187.0,Good,Good
2,36630901_MATNO,2021-08-16 07:33:03.300,519660.0,Good,Good
4,36630901_MATNO,2021-08-16 07:31:07.700,519660.0,Good,Good
6,36630901_MATNO,2021-08-12 17:09:20.200,519660.0,Good,Good
8,36630901_MATNO,2021-08-12 09:31:05.100,821062.0,Good,Good
...,...,...,...,...,...
68,36680902_MATNO,2021-09-04 08:48:27.200,528558.0,Good,Good
70,36680902_MATNO,2021-09-04 03:25:43.600,530436.0,Good,Good
72,36680902_MATNO,2021-09-03 22:03:24.200,533150.0,Good,Good
74,36680902_MATNO,2021-09-02 12:53:00.000,527342.0,Good,Good


In [72]:
# keep only the last row and drop duplicates on ordernumber and batch_id
merge4.drop_duplicates(subset=['36630901_ORDERNUMBER','36630901_ZA_ORDERNUMBER','36650901_ORDERNUMBER','BATCH_ID'], keep='last', inplace=True)


In [73]:
merge4

Unnamed: 0,IP_TREND_TIME,36630901_ORDERNUMBER,36630901_ZA_ORDERNUMBER,36650901_ORDERNUMBER,BATCH_ID,BATCH_SIZE
1,2021-08-02 20:43:03.700,80760843.0,1F022,,1F111,530092.0
3,2021-08-02 22:51:44.100,,,80757084.0,,527537.0
5,2021-08-03 19:56:16.900,,,80756656.0,,535797.0
7,2021-08-04 00:59:12.800,,,80756289.0,,532882.0
9,2021-08-04 06:46:41.500,80760846.0,1F022,,1F114,530092.0
...,...,...,...,...,...,...
241,2021-09-29 12:27:19.300,80765624.0,1F009,,1F166,533994.0
243,2021-09-29 13:04:42.400,,,80760974.0,,528401.0
248,2021-09-29 23:20:40.000,,,80763694.0,,536578.0
250,2021-09-30 12:40:19.400,80765625.0,1F004,,1F167,781598.0
