In [16]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split


In [17]:
df = pd.read_csv("Procurement KPI Analysis Dataset.csv")

In [18]:
df.isnull().sum()

PO_ID                 0
Supplier              0
Order_Date            0
Delivery_Date        87
Item_Category         0
Order_Status          0
Quantity              0
Unit_Price            0
Negotiated_Price      0
Defective_Units     136
Compliance            0
dtype: int64

In [19]:
df.head(10)

Unnamed: 0,PO_ID,Supplier,Order_Date,Delivery_Date,Item_Category,Order_Status,Quantity,Unit_Price,Negotiated_Price,Defective_Units,Compliance
0,PO-00001,Alpha_Inc,2023-10-17,2023-10-25,Office Supplies,Cancelled,1176,20.13,17.81,,Yes
1,PO-00002,Delta_Logistics,2022-04-25,2022-05-05,Office Supplies,Delivered,1509,39.32,37.34,235.0,Yes
2,PO-00003,Gamma_Co,2022-01-26,2022-02-15,MRO,Delivered,910,95.51,92.26,41.0,Yes
3,PO-00004,Beta_Supplies,2022-10-09,2022-10-28,Packaging,Delivered,1344,99.85,95.52,112.0,Yes
4,PO-00005,Delta_Logistics,2022-09-08,2022-09-20,Raw Materials,Delivered,1180,64.07,60.53,171.0,No
5,PO-00006,Epsilon_Group,2022-08-17,2022-08-29,MRO,Delivered,1145,69.21,63.57,39.0,Yes
6,PO-00007,Gamma_Co,2022-05-23,2022-06-03,MRO,Delivered,1774,51.37,47.82,96.0,No
7,PO-00008,Alpha_Inc,2022-04-15,2022-04-29,MRO,Delivered,1094,36.93,32.78,22.0,Yes
8,PO-00009,Gamma_Co,2023-11-24,2023-11-28,Raw Materials,Partially Delivered,1688,43.93,39.89,89.0,Yes
9,PO-00010,Gamma_Co,2023-07-13,2023-07-25,Raw Materials,Pending,171,76.87,70.2,8.0,Yes


In [20]:
df['Supplier'].value_counts()

Supplier
Delta_Logistics    171
Epsilon_Group      166
Beta_Supplies      156
Gamma_Co           143
Alpha_Inc          141
Name: count, dtype: int64

In [21]:
df['Order_Status'].value_counts()

Order_Status
Delivered              560
Pending                 81
Partially Delivered     73
Cancelled               63
Name: count, dtype: int64

In [22]:
x_train, x_test, y_train, y_test = train_test_split(
    df.drop(columns=['Compliance']), 
    df['Compliance'], 
    test_size=0.2, 
    random_state=45  
)

In [30]:
x_train.head(10)

Unnamed: 0,PO_ID,Supplier,Order_Date,Delivery_Date,Item_Category,Order_Status,Quantity,Unit_Price,Negotiated_Price,Defective_Units
227,PO-00228,Epsilon_Group,2022-08-24,2022-08-27,Packaging,Delivered,1871,59.96,58.54,47.0
297,PO-00298,Alpha_Inc,2022-08-07,2022-08-10,Office Supplies,Delivered,296,40.38,35.25,
69,PO-00070,Alpha_Inc,2022-04-14,2022-04-22,MRO,Delivered,291,98.7,87.02,9.0
540,PO-00541,Epsilon_Group,2023-02-23,2023-03-08,Office Supplies,Delivered,1738,67.91,61.15,
394,PO-00395,Gamma_Co,2023-11-11,2023-11-18,MRO,Delivered,1969,71.71,61.96,
581,PO-00582,Delta_Logistics,2023-09-27,2023-10-12,Office Supplies,Partially Delivered,1179,90.14,82.63,194.0
542,PO-00543,Epsilon_Group,2023-02-17,2023-02-23,Electronics,Delivered,624,74.39,66.3,18.0
256,PO-00257,Beta_Supplies,2022-04-10,2022-04-24,Packaging,Delivered,1900,24.11,23.45,188.0
564,PO-00565,Epsilon_Group,2023-09-14,2023-10-03,Raw Materials,Pending,54,32.28,28.65,3.0
393,PO-00394,Alpha_Inc,2022-11-03,2022-11-07,Office Supplies,Delivered,963,64.96,64.07,20.0


In [29]:
transformer = ColumnTransformer(
    transformers=[
        ('ordinal', OrdinalEncoder(categories=[['Cancelled', 'Pending', 'Partially Delivered', 'Delivered']]), ['Order_Status']),
        ('onehot', OneHotEncoder(sparse_output=False, drop='first'), ['Supplier'])
    ],
    remainder='passthrough'
)

In [39]:
x_train_new = transformer.fit_transform(x_train)

AttributeError: 'numpy.ndarray' object has no attribute 'pd'

In [46]:
x_test_new

array([[3.0, 0.0, 0.0, ..., 86.91, 78.44, 18.0],
       [3.0, 0.0, 0.0, ..., 69.73, 63.8, 41.0],
       [3.0, 0.0, 0.0, ..., 16.46, 14.2, 57.0],
       ...,
       [3.0, 0.0, 0.0, ..., 11.92, 11.72, 76.0],
       [3.0, 0.0, 0.0, ..., 72.23, 67.86, 53.0],
       [3.0, 0.0, 0.0, ..., 34.2, 30.5, 25.0]], dtype=object)

In [40]:
x_train_new = pd.DataFrame(x_test_new)

In [41]:
x_train_new.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,3.0,0.0,0.0,1.0,0.0,PO-00515,2023-01-30,2023-02-16,Packaging,970,86.91,78.44,18.0
1,3.0,0.0,0.0,1.0,0.0,PO-00244,2023-11-17,2023-11-29,Electronics,1332,69.73,63.8,41.0
2,3.0,0.0,0.0,1.0,0.0,PO-00314,2022-07-14,2022-08-03,Raw Materials,1584,16.46,14.2,57.0
3,3.0,0.0,0.0,0.0,0.0,PO-00420,2022-02-15,2022-03-04,Raw Materials,509,76.76,74.72,7.0
4,3.0,0.0,0.0,0.0,1.0,PO-00031,2022-10-12,2022-10-26,Electronics,326,18.1,15.92,
5,3.0,0.0,0.0,1.0,0.0,PO-00299,2023-02-15,2023-02-27,MRO,885,88.28,80.8,31.0
6,3.0,0.0,0.0,0.0,1.0,PO-00686,2023-02-04,,Electronics,1618,32.76,28.41,62.0
7,3.0,0.0,1.0,0.0,0.0,PO-00327,2023-06-28,2023-07-01,Office Supplies,1004,96.51,92.9,146.0
8,3.0,0.0,0.0,1.0,0.0,PO-00453,2023-11-14,2023-12-01,Office Supplies,1585,56.38,53.05,
9,3.0,0.0,0.0,1.0,0.0,PO-00651,2022-10-20,,MRO,1807,69.47,60.0,50.0
