In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import train_test_split

#Load the dataset
data = pd.read_csv('/content/creditcard.csv')

#Separate features and target variable
X = data.drop('Time', axis=1)
y = data['Time']

#Data preprocessing
#1. Handling missing values
X.fillna(0, inplace=True) # Replace missing values with 0

#2. Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

#3. Categorical feature encoding
label_encoder = LabelEncoder()
X['Amount'] = label_encoder.fit_transform(X['Amount'])

#4. One-hot encoding
one_hot_encoder = OneHotEncoder()
X_encoded = one_hot_encoder.fit_transform(X)

#Feature engineering
#1. Feature selection
selector = SelectKBest(score_func=chi2, k=5) # Select top 5 features
X_selected = selector.fit_transform(X_encoded, y)

#2. Creating new features
X['new_feature'] = X['feature1'] + X['feature2']

#Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)



