> # Import Modules

In [3]:
import sys
import os
import pandas as pd
import numpy as np

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from noventis.data_cleaner import NoventisEncoder
print('Successfully imported modules!')

Successfully imported modules!


> # Prepare for Data Frame

In [4]:
data = {
    'Country': ['USA', 'UK', 'Canada', 'USA', 'Germany', 'UK', 'USA', 'France', 'Canada', 'Germany'],
    'Education': ['Bachelors', 'Masters', 'PhD', 'Bachelors', 'Masters', 'Bachelors', 'PhD', 'Masters', 'Masters', 'Bachelors'],
    'Size': ['Medium', 'Small', 'Large', 'Medium', 'Large', 'Small', 'Medium', 'Large', 'Medium', 'Small'],
    'Has_Pet': ['Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No'],
    'Target': [1, 0, 0, 1, 0, 0, 1, 0, 1, 1]
}
df = pd.DataFrame(data)

> # Automatic Encoding (Recommended)

In [5]:
df_auto = df.copy()
y_auto = df_auto['Target']
X_auto = df_auto.drop('Target', axis=1)

encoder_auto = NoventisEncoder(method='auto', target_column='Target', verbose=True)

df_encoded_auto = encoder_auto.fit_transform(X_auto, y_auto)

print("\nTransformed DataFrame Head:")
print(df_encoded_auto.head())

🚀 NOVENTIS ENCODER - ANALYSIS REPORT
📊 Analyzed 4 categorical columns

⚠️  MANUAL INTERVENTION RECOMMENDED for 'Country':
   - High correlation with target (0.418)
   - Consider using ordinal encoding with proper ordering
   - Sample values: ['USA', 'UK', 'Canada']
   - Falling back to target encoding for now

⚠️  MANUAL INTERVENTION RECOMMENDED for 'Size':
   - High correlation with target (0.758)
   - Consider using ordinal encoding with proper ordering
   - Sample values: ['Medium', 'Small', 'Large']
   - Falling back to target encoding for now


📋 ENCODING SUMMARY
----------------------------------------
   TARGET: 2 columns
   OHE: 1 columns
   LABEL: 1 columns

📊 DETAILED COLUMN ANALYSIS
----------------------------------------
   Country:
      Method: TARGET
      Unique values: 5
      Target correlation: 0.418

   Education:
      Method: OHE
      Unique values: 3
      Target correlation: 0.000
      Memory impact: 0.0 MB

   Size:
      Method: TARGET
      Unique values: 

> # Manual Ordinal Encoding

In [6]:
df_ordinal = df.copy()
y_ordinal = df_ordinal['Target']
X_ordinal = df_ordinal.drop('Target', axis=1)

size_mapping = {
    'Size': {'Small': 1, 'Medium': 2, 'Large': 3}
}

encoder_ordinal = NoventisEncoder(method='ordinal', columns_to_encode=['Size'], category_mapping=size_mapping)

df_encoded_ordinal = encoder_ordinal.fit_transform(X_ordinal)
print(df_encoded_ordinal[['Size_ordinal_encoded']].head())

   Size_ordinal_encoded
0                     2
1                     1
2                     3
3                     2
4                     3


> # Manual Target Encoding

In [8]:
df_target = df.copy()
y_target = df_target['Target']
X_target = df_target.drop('Target', axis=1)

encoder_target = NoventisEncoder(method='target', columns_to_encode=['Country'], target_column='Target', cv=3) 

df_encoded_target = encoder_target.fit_transform(X_target, y_target)
print(df_encoded_target[['Country_target_encoded']].head())

   Country_target_encoded
0                     1.0
1                     0.0
2                     0.5
3                     1.0
4                     0.5
