### Project: 'MODELO PREDICTIVO DE CLASIFICACIÓN DE CHURN BASADO EN PATRONES DE CONTACTO PARA UNA STARTUP BOLIVIANA'  
#### Author: Flavia Davila Perez
#### Arquitecture: Medallion
##### Stage: Silver 
##### Sub-Stage: Standarized
Description: Data normalization and standardization


### Libraries 

In [1]:
import os
import json
import pandas as pd
from datetime import datetime, timedelta

In [2]:
import sys

sys.path.append('../scripts')
from silver_functions import *

### Import Bronce Data

In [3]:
# Import Bronce Data from csv
# Activitites
df_activities_com = pd.read_csv(r'..\data\output_bronce\activities_com.csv', dtype=str)
df_activities_exp = pd.read_csv(r'..\data\output_bronce\activities_exp.csv', dtype=str)

# Deals
df_deals_com = pd.read_csv(r'..\data\output_bronce\deals_com.csv', dtype=str)
df_deals_exp= pd.read_csv(r'..\data\output_bronce\deals_exp.csv', dtype=str)

In [4]:
# Import Bronce Data from JSON
with open(r'..\data\output_bronce\json_deals.json', "r") as json_file:
    json_deals_metadata = json.load(json_file)

with open(r'..\data\output_bronce\json_activities.json', "r") as json_file:
    json_act_metadata = json.load(json_file)

with open(r'..\data\output_bronce\act_col.json', "r") as json_file:
    act_col = json.load(json_file)

with open(r'..\data\output_bronce\deals_col.json', "r") as json_file:
    deals_col = json.load(json_file)

with open('stages_dict.json', 'r') as f:
    stages_dict = json.load(f)

### MAP DATA

#### Restructuring JSON

In [5]:
# Restructuring metadata - Deals
restructured_deals = restructure_metadata(json_deals_metadata)

In [6]:
# Restructuring metadata - Activities
restructured_activities = restructure_metadata(json_act_metadata)

### Mapping Columns

In [7]:
# mapping columns - Activities
act_map = {field['key']: field['name'] for field in act_col}

In [8]:
# mapping activities
df_activities_com.rename(columns=act_map, inplace=True)
df_activities_exp.rename(columns=act_map, inplace=True)

In [9]:
# mapping columns - Deals
deals_map = {field['key']: field['name'] for field in deals_col}

In [10]:
# mapping activities
df_deals_com.rename(columns=deals_map, inplace=True)
df_deals_exp.rename(columns=deals_map, inplace=True)

### Mapping Stages

In [11]:
df_deals_com['Etapa'] = df_deals_com['Etapa'].astype(str)
df_deals_com['Etapa'] = df_deals_com['Etapa'].map(stages_dict)

In [12]:
df_deals_exp['Etapa'] = df_deals_exp['Etapa'].astype(str)
df_deals_exp['Etapa'] = df_deals_exp['Etapa'].map(stages_dict)

### Mapping Rows

In [13]:
# mapping values - activities
df_activities_com = map_columns_by_name(df_activities_com, restructured_activities)
df_activities_exp = map_columns_by_name(df_activities_exp, restructured_activities)

In [14]:
# mapping values - deals
df_deals_com = map_columns_by_name(df_deals_com, restructured_deals)

Las siguientes columnas no existen en el DataFrame: ['Razón de la pérdida', '(EXP) Estado renovación', '(EXP) Tipo de Ingreso', '(EXP) ¿Recomendador?', 'TBD']


In [15]:
df_deals_exp = map_columns_by_name(df_deals_exp, restructured_deals)

### Replace 'Tipo'

In [16]:
# Remplace old
df_activities_com['Tipo'] = df_activities_com['Tipo'].map({
    'Reunión In': 'R2 In', 
    'Reunión Out': 'R2 Out',
    'Reunión Online' : 'R2 online',
    'Reunión In Cancelada' : 'R2 in cancelado', 
    'Reunión Out Cancelada' : 'R2 out cancelado',
    'Reunión Online cancelada' : 'R2 online cancelado'
    }).fillna(df_activities_com['Tipo'])

In [17]:
df_deals_exp["(C) (EXP) Plazo y Pago"] = df_deals_exp["(C) (EXP) Plazo y Pago"].apply(lambda x: 
    "Anual" if "anual" in str(x).lower() else
    "Bianual" if "bianual" in str(x).lower() else
    "Semestral" if "semestral" in str(x).lower() else
    "Trimestral" if "trimestral" in str(x).lower() else
    "Mensual" if "mensual" in str(x).lower() else
    "Otros"
)

In [18]:
# Export Standarized Data to CSV

# Activities
df_activities_com.to_csv(r'..\data\output_silver\01_silver_standarized\activities_com.csv', index=False)
df_activities_exp.to_csv(r'..\data\output_silver\01_silver_standarized\activities_exp.csv', index=False)

# Deals
df_deals_com.to_csv(r'..\data\output_silver\01_silver_standarized\deals_com.csv', index=False)
df_deals_exp.to_csv(r'..\data\output_silver\01_silver_standarized\deals_exp.csv', index=False)