### Feature engineering

In [1]:
# Feature engineering for fraud data
def engineer_fraud_features(df, ip_country):
    # Convert IP to integer and merge with country data
    df['ip_address'] = df['ip_address'].apply(lambda x: int(x.replace('.', '')))
    
    # Merge with country data
    for index, row in ip_country.iterrows():
        mask = (df['ip_address'] >= row['lower_bound_ip_address']) & (df['ip_address'] <= row['upper_bound_ip_address'])
        df.loc[mask, 'country'] = row['country']
    
    # Time-based features
    df['hour_of_day'] = df['purchase_time'].dt.hour
    df['day_of_week'] = df['purchase_time'].dt.dayofweek
    df['time_since_signup'] = (df['purchase_time'] - df['signup_time']).dt.total_seconds() / 3600
    
    # Transaction velocity (would need more data for proper implementation)
    # For now, we'll calculate purchases per user
    user_counts = df['user_id'].value_counts().to_dict()
    df['user_transaction_count'] = df['user_id'].map(user_counts)
    
    # Encode categorical features
    df = pd.get_dummies(df, columns=['source', 'browser', 'sex', 'country'], drop_first=True)
    
    return df

# Apply feature engineering
fraud_data_fe = engineer_fraud_features(fraud_data_clean, ip_country)

# For credit data, features are already engineered (V1-V28 from PCA)
# Just separate features and target
X_credit = credit_data_clean.drop('Class', axis=1)
y_credit = credit_data_clean['Class']

# Save engineered data
fraud_data_fe.to_csv('../data/processed/fraud_data_fe.csv', index=False)

NameError: name 'fraud_data_clean' is not defined