# Imports

In [41]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency,ttest_1samp
import unittest


from sklearn.datasets import load_iris
from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif
from sklearn.impute import SimpleImputer

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression, LinearRegression, SGDClassifier
# from sklearn.linear_model import Lasso, Ridge  # For regularized regression
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, AdaBoostClassifier

# #metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, roc_curve, roc_auc_score, mean_squared_error
# from sklearn.datasets import make_classification


# Start

In [42]:
X, y = load_iris(return_X_y=True)
rng = np.random.RandomState(0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.20, random_state=rng)
X_train[:5]


array([[6.4, 3.1, 5.5, 1.8],
       [5.4, 3. , 4.5, 1.5],
       [5.2, 3.5, 1.5, 0.2],
       [6.1, 3. , 4.9, 1.8],
       [6.4, 2.8, 5.6, 2.2]])

In [43]:
pipe = Pipeline([ 
('scaler', MinMaxScaler()),
('clf', SGDClassifier(max_iter=1000, random_state=rng))
])

pipe.fit(X_train, y_train)
pipe.predict(X_test)

array([2, 1, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 1, 0, 0, 2, 2,
       0, 0, 2, 0, 0, 2, 1, 0])

In [44]:
# Show original X_train (before transformation)
print("Original X_train (first 5 rows):")
pd.DataFrame(X_train[:5])

Original X_train (first 5 rows):


Unnamed: 0,0,1,2,3
0,6.4,3.1,5.5,1.8
1,5.4,3.0,4.5,1.5
2,5.2,3.5,1.5,0.2
3,6.1,3.0,4.9,1.8
4,6.4,2.8,5.6,2.2


In [45]:
# Transform X_train using the pipeline (explicitly)
X_train_transformed = pipe.named_steps['scaler'].transform(X_train)
# Show transformed X_train
print("Transformed X_train (first 5 rows):")
pd.DataFrame(X_train_transformed[:5])

Transformed X_train (first 5 rows):


Unnamed: 0,0,1,2,3
0,0.583333,0.458333,0.758621,0.708333
1,0.305556,0.416667,0.586207,0.583333
2,0.25,0.625,0.068966,0.041667
3,0.5,0.416667,0.655172,0.708333
4,0.583333,0.333333,0.775862,0.875


In [46]:
y_pred = pipe.predict(X_test)
y_pred[:5]


array([2, 1, 0, 2, 0])

# End