In [1]:
import pandas as pd
import os

In [2]:
current_path = os.getcwd()
print(current_path)

c:\Users\sepujas\Dev\mat2\notebooks


In [3]:
base_path = os.path.abspath(os.path.join(current_path, os.pardir))
print(base_path)

c:\Users\sepujas\Dev\mat2


In [4]:
stock_file_path = os.path.join(base_path, 'data', 'raw', 'tbl_stock_mb52.txt')
print(stock_file_path)

c:\Users\sepujas\Dev\mat2\data\raw\tbl_stock_mb52.txt


In [5]:
stock_preprocessed_path = os.path.join(base_path, 'data', 'preprocessed', 'tbl_stock.csv')
print(stock_preprocessed_path)

c:\Users\sepujas\Dev\mat2\data\preprocessed\tbl_stock.csv


In [6]:
df_stock = pd.read_csv(stock_file_path, sep='\t', skiprows=1, encoding='latin1')
df_stock.head()

Unnamed: 0.1,Unnamed: 0,Material,Material Description,SLoc,SL,Plnt,BUn,Unrestricted,Transit/Transf.,Blocked,In Qual. Insp.,Restricted-Use,Returns,Stk in Transit
0,,1953,Drill bit extension TE-FY-E60,1.0,,3000,PC,2,0.0,0.0,0.0,0,0,0.0
1,,1954,Drill bit extension TE-FY-E85,1.0,,8750,PC,4,0.0,0.0,0.0,0,0,0.0
2,,3730,Piston ring,1.0,,3000,PC,1,0.0,0.0,0.0,0,0,0.0
3,,24631,Cleaner CFR1 500ML,1.0,,3000,PC,5,0.0,5.0,0.0,0,0,0.0
4,,24669,Dispenser CS 270-P1,1.0,,3000,PC,28,0.0,0.0,0.0,0,0,0.0


In [7]:
unnamed_columns = [col for col in df_stock.columns if 'Unnamed:' in col]

print("Columns with 'Unnamed:' in their title:")
for col in unnamed_columns:
    print(col)

Columns with 'Unnamed:' in their title:
Unnamed: 0


In [8]:
df_stock.drop(columns=unnamed_columns, inplace=True)

In [9]:
pd.set_option('display.max_columns', None)


In [10]:
df_stock.head()

Unnamed: 0,Material,Material Description,SLoc,SL,Plnt,BUn,Unrestricted,Transit/Transf.,Blocked,In Qual. Insp.,Restricted-Use,Returns,Stk in Transit
0,1953,Drill bit extension TE-FY-E60,1.0,,3000,PC,2,0.0,0.0,0.0,0,0,0.0
1,1954,Drill bit extension TE-FY-E85,1.0,,8750,PC,4,0.0,0.0,0.0,0,0,0.0
2,3730,Piston ring,1.0,,3000,PC,1,0.0,0.0,0.0,0,0,0.0
3,24631,Cleaner CFR1 500ML,1.0,,3000,PC,5,0.0,5.0,0.0,0,0,0.0
4,24669,Dispenser CS 270-P1,1.0,,3000,PC,28,0.0,0.0,0.0,0,0,0.0


In [11]:
column_titles = df_stock.columns.tolist()
column_titles

['Material',
 'Material Description',
 'SLoc',
 'SL',
 'Plnt',
 'BUn',
 '   Unrestricted',
 'Transit/Transf.',
 '        Blocked',
 ' In Qual. Insp.',
 ' Restricted-Use',
 '        Returns',
 ' Stk in Transit']

In [12]:
new_column_titles = {col: col.strip().replace(' ', '_').replace('/','_').replace('-','_').replace('.','') for col in column_titles}

In [13]:
df_stock.rename(columns=new_column_titles, inplace=True)


In [14]:
df_stock.columns.tolist()

['Material',
 'Material_Description',
 'SLoc',
 'SL',
 'Plnt',
 'BUn',
 'Unrestricted',
 'Transit_Transf',
 'Blocked',
 'In_Qual_Insp',
 'Restricted_Use',
 'Returns',
 'Stk_in_Transit']

In [15]:
df_stock.dtypes

Material                  int64
Material_Description     object
SLoc                    float64
SL                       object
Plnt                      int64
BUn                      object
Unrestricted             object
Transit_Transf          float64
Blocked                 float64
In_Qual_Insp            float64
Restricted_Use            int64
Returns                   int64
Stk_in_Transit          float64
dtype: object

In [18]:
df_stock['Material'] = df_stock['Material'].fillna(0).astype(int).astype(str).str.strip()
df_stock['Material_Description'] = df_stock['Material_Description'].astype(str).str.strip()
df_stock['SLoc'] = df_stock['SLoc'].fillna(0).astype(int).astype(str).str.strip()
df_stock['SL'] = df_stock['SL'].astype(str).str.strip()
df_stock['Plnt'] = df_stock['Plnt'].fillna(0).astype(int).astype(str).str.strip()
df_stock['BUn'] = df_stock['BUn'].astype(str).str.strip()
df_stock['Unrestricted'] = df_stock['Unrestricted'].astype(str).str.strip().str.replace('.', '').str.replace(',', '.').astype(float)
df_stock['Transit_Transf'] = df_stock['Transit_Transf'].astype(str).str.strip().str.replace('.', '').str.replace(',', '.').astype(float)
df_stock['Blocked'] = df_stock['Blocked'].astype(str).str.strip().str.replace('.', '').str.replace(',', '.').astype(float)
df_stock['In_Qual_Insp'] = df_stock['In_Qual_Insp'].astype(str).str.strip().str.replace('.', '').str.replace(',', '.').astype(float)
df_stock['Restricted_Use'] = df_stock['Restricted_Use'].astype(str).str.strip().str.replace('.', '').str.replace(',', '.').astype(float)
df_stock['Returns'] = df_stock['Returns'].astype(str).str.strip().str.replace('.', '').str.replace(',', '.').astype(float)
df_stock['Stk_in_Transit'] = df_stock['Stk_in_Transit'].astype(str).str.strip().str.replace('.', '').str.replace(',', '.').astype(float)




In [19]:
df_stock['Unrestricted'] = pd.to_numeric(df_stock['Unrestricted'], errors='coerce')

In [20]:
sorg_plant_dict = {
    8750: [8750, 8760],
    8300: [8330, 8302],
    8650: [8650, 8663, 8655, 8658, 8662, 8651],
    3000: [3000, 3002, 3010, 3014, 3016, 3018, 3022]
}

In [21]:
plant_to_sorg = {
    "8750": "8750", "8760": "8750",
    "8330": "8300", "8302": "8300",
    "8650": "8650", "8663": "8650", "8655": "8650", "8658": "8650", "8662": "8650", "8651": "8650",
    "3000": "3000", "3002": "3000", "3010": "3000", "3014": "3000", "3016": "3000", "3018": "3000", "3022": "3000"
}

In [22]:
plant_to_sorg

{'8750': '8750',
 '8760': '8750',
 '8330': '8300',
 '8302': '8300',
 '8650': '8650',
 '8663': '8650',
 '8655': '8650',
 '8658': '8650',
 '8662': '8650',
 '8651': '8650',
 '3000': '3000',
 '3002': '3000',
 '3010': '3000',
 '3014': '3000',
 '3016': '3000',
 '3018': '3000',
 '3022': '3000'}

In [23]:
def map_plant_to_sorg(plant):
    return plant_to_sorg.get(plant, None)

In [24]:
map_plant_to_sorg('3016')




'3000'

In [25]:
df_stock['Sorg'] = df_stock['Plnt'].map(map_plant_to_sorg)

In [26]:
df_stock['key_material'] = df_stock['Sorg'] + '/' + df_stock['Material']
df_stock['key_material'] = df_stock['key_material'].astype(str).str.strip()


In [27]:
stock_preprocessed_path

'c:\\Users\\sepujas\\Dev\\mat2\\data\\preprocessed\\tbl_stock.csv'

In [28]:
df_stock.to_csv(stock_preprocessed_path, index=False)

In [29]:
df_stock.dtypes

Material                 object
Material_Description     object
SLoc                     object
SL                       object
Plnt                     object
BUn                      object
Unrestricted            float64
Transit_Transf          float64
Blocked                 float64
In_Qual_Insp            float64
Restricted_Use          float64
Returns                 float64
Stk_in_Transit          float64
Sorg                     object
key_material             object
dtype: object