# IMPORTS 

In [2]:
import pandas as pd
pd.set_option("max_colwidth", None)

import pycaret
import numpy as np
import matplotlib.pyplot as plt
from pycaret.classification import * 
from sklearn.model_selection import train_test_split

# LOAD DATA

##### SIMPLE

In [3]:
# READ IN TRAIN
df = pd.read_csv('./data/fundraising.csv')
# SPLIT INTO TRAIN AND DEV
train, dev = train_test_split(df, test_size=0.20, random_state=42)
train['type'] = 'train'
dev['type'] = 'dev'

# READ IN TEST
test = pd.read_csv('./data/future_fundraising.csv')

# MAKE TARGET NANs
test['target'] = np.nan
test['type'] = 'test'

train_dev = pd.concat([train, dev], ignore_index=True, axis=0)
train_dev = train_dev.sample(frac=1).reset_index(drop=True)
df = pd.concat([train_dev, test], ignore_index=True, axis=0)
df.to_csv('./data/df.csv')

##### COMPLEX

In [4]:
# Interaction between 'income' and 'wealth'
df['interaction_wealth_income'] = df['income'] * df['wealth']

# Income to Home Value Ratio
df['income_to_home_ratio'] = df['income'] / df['home_value']

# Wealth to Home Value Ratio
df['wealth_to_home_ratio'] = df['wealth'] / df['home_value']

# Gift to Income Ratio
df['gift_to_income_ratio'] = df['lifetime_gifts'] / df['income']  # Assuming 'lifetime_gifts' is the total amount donated

# Average Gift Size to Largest Gift Ratio
df['average_gift_size_to_largest_gift_ratio'] = df['avg_gift'] / df['largest_gift']

# Total Gifts to Number of Promotions
df['total_gifts_to_number_of_promotions'] = df['lifetime_gifts'] / df['num_prom']

df['homeowner_children'] = df['homeowner'].astype(str) + "_" + df['num_child'].astype(str)

df['zip_gender'] = df['zipconvert2'].astype(str) + "_" + df['zipconvert3'].astype(str) + "_" + df['zipconvert4'].astype(str) + "_" + df['zipconvert5'].astype(str) + "_" + df['female'].astype(str)

df['time_since_last_gift_bucket'] = pd.cut(df['months_since_donate'], bins=[0, 6, 12, 24, np.inf], labels=['0-6', '7-12', '13-24', '24+'])

df = df.dropna()
df.to_csv('./data/df_complex.csv')