<h2>Преобразование переменных</h2>

In [45]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [46]:
df = pd.read_csv('data/final.csv').drop(columns=['Unnamed: 0'])

In [47]:
# не писать в презе
df['finish_dttm'] = pd.to_datetime(df['finish_dttm'], format='%Y-%m-%d %H:%M:%S') # преобразование во временную переменную
df['age'] = pd.to_numeric(df['age'], errors='coerce') # преобразование во числовую переменную
df["bundle_nm"] = df["bundle_nm"].fillna('Без подписки')
df["bundle_nm"] = df["bundle_nm"].replace('XXX', 'Без подписки')

df['success_flg'] = df['wo_hit_status_result_desc'].map({'Дозвон, Отказ': 0, 'Дозвон, Успешно': 1})

# хз писать в презе
df['gender_cd'] = df['gender_cd'].map({'F': 0, 'M': 1}) # преобразование в бинарную переменную
df['emp_gender_cd'] = df['emp_gender_cd'].map({'W': 0, 'M': 1}) # преобразование в бинарную переменную
df['day_of_week'] = df['finish_dttm'].dt.dayofweek # создание переменной дня недели
df['holiday_flg'] = df.day_of_week.apply(lambda x: 1 if x > 4 else 0) # создание флага выходного дня
df['finish_dttm'] = df['finish_dttm'].dt.strftime('%Y-%m-%d') # сокращение структуры переменной времени

credit_products = ['Credit Cards']
loan_products = ['Cash Loan', 'Secured Loan', 'Car Loan']
debit_investment_products = ['Debit Cards']
investment_products = ['Investment']
insurance_services = ['Casco', 'MVNO', 'Mortgage Insurance']

df['product'] = 'Другое'
df.loc[df['product_cd'].isin(credit_products), 'product'] = 'Кредитная карта'
df.loc[df['product_cd'].isin(loan_products), 'product'] = 'Кредит'
df.loc[df['product_cd'].isin(debit_investment_products), 'product'] = 'Дебетовая карта'
df.loc[df['product_cd'].isin(insurance_services), 'product'] = 'Страхование и услуги'
df.loc[df['product_cd'].isin(investment_products), 'product'] = 'Инвестиции'
df = df[~(df['product'] == 'Другое')]

loan_products = ['Upsell', 'Cold', 'Downsell', 'Utilization', 'Reutilization', 'Prolongation', 'Refinancing', 'Preapprove', 'Autoleads', 'BAF']
debit_investment_products = ['Afterfilling', 'Meeting', 'Incoming', 'Activation', 'Agreement']

df['function'] = 'Другое'
df.loc[df['functional_cd'].isin(loan_products), 'function'] = 'Продажи и предложения'

df['auto_and_entrepreneur_flg'] = df['auto_flg'] * df['entrepreneur_flg']
df['region_category'] = df['region_size'].apply(lambda x: 'город' if x in ['urban', 'town', 'msk'] else 'сельская местность')
df['region_category_dummies'] = df['region_category'].map({'город': 1, 'сельская местность': 0})
df['age_group'] = pd.cut(df['age'], bins=[14, 24, 34, 49, 64, 100], labels=['14-24', '25-34', '35-49', '50-64', '65+'])
df['age_group_dummies'] = df['age_group'].map({'14-24': 1, '25-34': 2, '35-49': 3, '50-64': 4, '65+': 5})

def determine_marital_status(row):
    if row['partnership_flg'] == 0 and row['child_amt'] == 0:
        return 'одинокий'
    elif row['partnership_flg'] == 1 and row['child_amt'] == 0:
        return 'с партнером без детей'
    elif row['partnership_flg'] == 1 and row['child_amt'] > 0:
        return 'с партнером с детьми'
    elif row['partnership_flg'] == 0 and row['child_amt'] > 0:
        return 'одинокий с детьми'

df['age_difference'] = abs(df['emp_age'] - df['age'])

df['marital_status'] = df.apply(determine_marital_status, axis=1)

def determine_work_status(row):
    if row < 6:
        return "новичок"
    elif  24 > row >= 6:
        return "опытный"
    else:
        return "сеньор"
df["work_experience_status"] = df["emp_work_months_cnt"].apply(determine_work_status)

In [48]:
df.to_csv('data/data_preprocessed.csv', index=False)