## **Function Transform**

In [None]:
# The Function Transformer takes as input a single function that will be applied
# to each sample in the data . This function can be any python Function that takes
# a single argument, such as a lambda function or a user defined function . The
# function should return the transformed sample

In [None]:
import numpy as np
from sklearn.preprocessing import FunctionTransformer

# Create a dataset
x = np.array([[1,2], [3,4]])

# Define the transformation function
log_transform = FunctionTransformer(np.log1p)

# apply the transformation to the dataset
x_transformed = log_transform.transform(x)

# View the transform data
print(x_transformed)

[[0.69314718 1.09861229]
 [1.38629436 1.60943791]]


In [None]:
# 1> Custom Feature Engineering
from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
x = np.array([[1,2],[3,4]])

# define a custom feature engineering function
def holi(x):
  return np.hstack((x , x**2))

# create a functiontransformer to apply the custom function
custom_transformer = FunctionTransformer(holi)

# apply the transformer to the input data

x_transformed = custom_transformer.transform(x)

# view the transformed data
print(x_transformed)

[[ 1  2  1  4]
 [ 3  4  9 16]]


In [None]:
from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
x = np.array([[1,2],[3,4]])

# define a custom feature scaling function
def my_scaling(x):
  return x/np.max(x)

# create a functiontransformer to apply the custom function
custom_transformer = FunctionTransformer(my_scaling)

# apply the transformer to the input data

x_transformed = custom_transformer.transform(x)

# view the transformed data
print(x_transformed)

[[0.25 0.5 ]
 [0.75 1.  ]]


In [None]:
from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
x = np.array([[1,2],[3,np.nan]])

# define a custom feature cleaning function
def my_cleaning(x):
  x[np.isnan(x)]= 0
  return x

# create a functiontransformer to apply the custom function
custom_transformer = FunctionTransformer(my_cleaning)

# apply the transformer to the input data

x_transformed = custom_transformer.transform(x)

# view the transformed data
print(x_transformed)

[[1. 2.]
 [3. 0.]]


In [None]:
import numpy as np
import pandas as pd


In [None]:
df=pd.read_csv('/content/placement.csv')
df.head(5)

Unnamed: 0,cgpa,resume_score,placed
0,8.14,6.52,1
1,6.17,5.17,0
2,8.27,8.86,1
3,6.88,7.27,1
4,7.52,7.3,1


In [None]:
x =df.drop(columns=['placed'])
y=df['placed']

In [None]:
from sklearn.preprocessing import FunctionTransformer

In [None]:
log_transform = FunctionTransformer(np.log1p)
# apply the transform to the dataset

x_transformed = log_transform.transform(x)


In [None]:
x_transformed

Unnamed: 0,cgpa,resume_score
0,2.212660,2.017566
1,1.969906,1.819699
2,2.226783,2.288486
3,2.064328,2.112635
4,2.142416,2.116256
...,...,...
95,1.991976,1.998774
96,2.222459,2.170196
97,2.034706,2.172476
98,2.212660,1.891605


In [None]:
# Question
# Problem statement
# dataset --> data(int , float , object)
# function create --> separate into differnt list of columns (int , float , object)

In [None]:
import numpy as np
import pandas as pd

In [None]:
df=pd.read_csv('/content/covid_toy.csv')
df.head(5)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [None]:
import pandas as pd
from sklearn.preprocessing import FunctionTransformer

df = pd.read_csv("/content/covid_toy.csv")
int_list = []
float_list = []
object_list = []

def separate_data(df):
    for col in df.columns:
        if df[col].dtype == 'int64':
            int_list.append(col)
        elif df[col].dtype == 'float64':
            float_list.append(col)
        else:
            object_list.append(col)

    return [int_list, float_list, object_list]

# Create FunctionTransformer
transformer = FunctionTransformer(separate_data)

# Apply transformation
result = transformer.transform(df)

# Output results
print("Integer Columns:", result[0])
print("Float Columns:", result[1])
print("Object Columns:", result[2])


Integer Columns: ['age']
Float Columns: ['fever']
Object Columns: ['gender', 'cough', 'city', 'has_covid']


In [None]:
from sklearn.preprocessing import LabelEncoder
lb = LabelEncoder()
for i in object_list:
  df[i]=lb.fit_transform(df[i])
df.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,1,103.0,0,2,0
1,27,1,100.0,0,1,1
2,42,1,101.0,0,1,0


In [None]:
missing_df = df.isnull().sum().reset_index(name="missing")
missing_df

Unnamed: 0,index,missing
0,age,0
1,gender,0
2,fever,10
3,cough,0
4,city,0
5,has_covid,0


In [None]:
missing_col = []
for i in df.columns:
  missing=df[i].isnull().sum()
  if missing > 0:
    missing_col.append(i)
print(missing_col)

['fever']


In [None]:
for i in missing_col:
    if i in int_list:
        from sklearn.impute import SimpleImputer
        si = SimpleImputer(strategy="mean")
        df[i] = si.fit_transform(df[[i]])
    elif i in float_list:
        from sklearn.impute import SimpleImputer
        si = SimpleImputer(strategy="mean")
        df[i] = si.fit_transform(df[[i]])

df.isnull().sum()

Unnamed: 0,0
age,0
gender,0
fever,0
cough,0
city,0
has_covid,0
