In [1]:
import pandas as pd
import requests
import json
from datetime import datetime
import urllib
from sqlalchemy import create_engine, types
from sqlalchemy.pool import NullPool
from hubspot.crm.products import ApiException as ProductsApiException
from hubspot.auth.oauth import ApiException
from validate_email import validate_email
import hubspot
import os
from dotenv import load_dotenv

load_dotenv()
DATABASE_CONNECTION_URI = os.environ["NEW_DB_URL"]
ACCESS_TOKEN = os.environ["ACCESS_TOKEN"]
client_id = os.environ["CLIENT_ID"]
client_secret = os.environ["CLIENT_SECRET"]

# create a connection to the database
engine = create_engine(DATABASE_CONNECTION_URI)

In [2]:
class HubspotAPI:
    def __init__(self):
        # API KEY
        self.access_token = ACCESS_TOKEN
        self.client = hubspot.Client.create(access_token = self.access_token)
        self.max_results = 1000000

    def raw_export_deals(self, properties_dict):
        # Assistant Variables
        results = []
        after = 0

        while str(after).isnumeric() and len(results) < self.max_results:
            try:
                api_response = self.client.crm.deals.basic_api.get_page(
                    limit=100,
                    after=after,
                    properties=list(properties_dict.keys()),
                    associations=[],
                    archived=False,
                )
                api_response = api_response.to_dict()
                results.extend(api_response['results'])
                print("Hubspot Deals Export has gathered " + str(len(results)) + " Deals")
                try:
                    after = api_response['paging']['next']['after']
                except:
                    after = api_response['paging']
            except ProductsApiException as e:
                print("Exception when calling basic_api->get_page: %s\n" % e)
        property_results = []
        for result in results:
            property_results.append(result["properties"])
        df = pd.DataFrame(property_results)
        df.to_csv("raw_hubspot_deals.csv", encoding="latin-1", index=False, errors='ignore')

    def handle_raw_hubspot(self, csv_file, general_values, properties_dict, values_dict, date_columns, dtype={}):
        # Csv into Dataframe
        df = pd.read_csv(csv_file + ".csv", encoding="latin-1")
        # Rename Columns
        df.rename(columns=properties_dict, inplace=True)
        # Date Standarization
        # date_columns = [char.lower() for char in date_columns]
        # date_columns = ["_".join(char.split(" ")) for char in date_columns]
        # for date_column in date_columns:
        #     df[date_column] = pd.to_datetime(df[df[date_column].notna()][date_column], errors='ignore')
        #     df[date_column] = df[date_column].dt.strftime('%Y-%m-%d %H:%M:%S')
        # Adjust Datatypes
        for col in list(df.columns):
            if col not in date_columns:
                df[col] = df[col].astype(dtype[col], errors='ignore')
        # Rename Values
        values_dict.update(general_values)
        df = df.map(lambda x: str(x) if pd.notnull(x) else '')
        df.replace(values_dict, inplace=True)

        # Define diccionarios de reemplazo para columnas específicas
        reemplazo_dealstage = {
            "5101741": 'Identificación de Oportunidad (Ventas)',
            "d85ad2ee-ad84-48f3-a4e6-3c2752733963": 'Primer Contacto 10% (Ventas)',
            "750846": 'Demo Realizada 15% (Ventas)',
            "eb078c2a-ae3e-411a-9ce6-b8e5ca06dcd5": 'Presupuesto Enviado 20% (Ventas)',
            "d5608044-cac5-4e0c-9838-755be8adf652": 'Opt Avanzada / En Piloto 50% (Ventas)',
            "afcc2b30-8757-4d7e-923d-053d7db0205c": 'OC en Proceso / Cercano a Cierre 90% (Ventas)',
            "3d0d99a8-5791-421b-a1e5-6234c0844b65": 'Deal Won (Ventas)',
            "03a76306-0231-417d-9223-5bd0b12f63d5": 'Deal Lost (Ventas)',
            "7356165": 'Oportunidad de Baja (Ventas)',
            "1005088": 'Partners potenciales (Partners)',
            "2ad1f599-a06f-4456-bea4-e28739d0c6fb": 'Partner confirmado (NDA firmado) - Coordinar Onboarding (Partners)',
            "17487102": 'Capacitación comercial coordinada (Partners)',
            "2014582": 'Capacitación técnica coordinada (Partners)',
            "1611634": 'Capacitación comercial y técnica coordinada (Partners)',
            "32753101-a1a6-4c44-a789-82576c7b1505": 'Partner certificado - VAR (Partners)',
            "7025326": 'Partner certificado - Referral (Partners)',
            "2491428": 'Canales en Pausa (Partners)',
            "ff4aae15-0c00-4fa8-a677-39b35df7924f": 'No hay interés/No califica (Partners)',
            "cf839eef-4bbd-4368-804d-6cf60dabc03c": 'Contactos Identficados (Prospección)',
            "d9852484-edc7-4623-9cec-6a1639cff93f": 'Secuencia Enviada (Prospección)',
            "cd642987-1f60-48d1-9d83-4042e15d4be8": 'En Seguimiento / No Contesta (Prospección)',
            "33ba8152-823d-49ff-abfc-cfb9951374f3": 'Agendamiento de Reunión (Prospección)',
            "45411143": 'Prospección Exitosa (Prospección)',
            "583eddf6-eeaa-418d-91d0-8de9e28a2ec3": 'No califica (Prospección)',
            "201584": 'Potentially Good Fit - Sin contacto previo (Investor Pipeline)',
            "201583": 'Exploratory meeting (Investor Pipeline)',
            "8b586784-2f37-4604-89bd-36d088131e4c": 'Potencial interest in following (Investor Pipeline)',
            "bbbfa8fe-eea1-4d0a-8c15-3b322e777348": 'Potencial lead Investor (Investor Pipeline)',
            "493985da-75dc-4e5d-b103-f7be26d8cdf7": 'Due dilligence (Investor Pipeline)',
            "92c9806c-f2b2-40bc-9e08-2e50738b4050": 'No fit (Investor Pipeline)',
            "839d6147-109d-4a3e-853b-25ecd3c9124e": 'Stand by (Investor Pipeline)',
            "23780789": 'Clientes (Customer Success & Upsell)',
            "93563059": 'Reunion 1  (Customer Success & Upsell)',
            "17681683": 'Reunion 2 (Customer Success & Upsell)',
            "17681685": 'Reunion 3 (Customer Success & Upsell)',
            "138434867": 'En Capacitacion (Customer Success & Upsell)',
            "138434869": 'Upsell (Customer Success & Upsell)',
            "138434868": 'Fin del ciclo (Customer Success & Upsell)',
            "22687028": 'Derivado a Soporte (Customer Success & Upsell)',
            "17681687": 'Derivado a Comercial (Customer Success & Upsell)',
            "17681688": 'Derivado a Renovacion (Customer Success & Upsell)',
            "29561767": 'Sin Respuesta (Customer Success & Upsell)'
        }
        reemplazo_pipeline = {
            "7742d15e-56ae-4415-9c86-567ad766837a": 'Ventas',
            "820bd061-2228-4455-a379-4bf409d4554a": 'Partners',
            "1e666b90-c872-4a63-a4c1-1bce1dc79947": 'Prospección',
            "b1830386-2a88-458f-9cb4-71511efaaf87": 'Investor Pipeline',
            "5667166": 'Customer Success & Upsell',
            "87829790": 'Testing',
        }
        reemplazo_deal_owner = {
            "7288359": 'Juan Martín Balan',
            "7332199": 'Nicolas Demner',
            "7857377": 'Martin Matias Fernandez Canto',
            "9194130": 'Joaquin Zoilo',
            "9430040": 'Gustavo Lauria',
            "30126325": 'Ventas Debmedia',
            "34868053": 'Matías Restahinoch',
            "38062801": 'Agustin Gelman',
            "38276053": 'Agustina Arroyo',
            "38627077": 'Tomas Noya',
            "55049136": 'Matias Pumo',
            "55049155": 'Camilo Varacalli',
            "56630584": 'Nicolas Menzaghi',
            "101057642": 'Roswel Amador',
            "112117634": 'Camila Acosta',
            "121481834": 'Soporte Debmedia',
            "246829367": 'Rafael Mattos',
            "312155919": 'Mariano Ahualli',
            "326472108": 'Lucía Imperiali',
            "366806335": 'Jaime Rodriguez',
            "396946262": 'valentina mineo',
            "399508303": 'Mateo Scapoli',
            "546914565": 'Iván Federico',
            "584399104": 'Aaron Escamilla',
            "597453098": 'Agustín Dimaio',
            "613110176": 'Pablo Prez',
            "653869216": 'Juan Lissarrague',
            "679733780": 'Agustina Coronel',
            "2119600589": "Nicolas Faccini",
            "8231273": 'deactivated user',
            "24586352": 'deactivated user',
            "14224485": 'deactivated user'
        }
        # Agrega más diccionarios según tus necesidades

        # Realiza el reemplazo en las columnas específicas
        df['dealstage'] = df['dealstage'].replace(reemplazo_dealstage)
        df['pipeline'] = df['pipeline'].replace(reemplazo_pipeline)
        df['hubspot_owner_id'] = df['hubspot_owner_id'].replace(reemplazo_deal_owner)

        # Diccionario de países y regiones comerciales
        diccionario_region_comercial = {
            'Argentina': 'Región 1 - AR, BO, PY, UR',
            'Bolivia': 'Región 1 - AR, BO, PY, UR',
            'Paraguay': 'Región 1 - AR, BO, PY, UR',
            'Uruguay': 'Región 1 - AR, BO, PY, UR',
            'Chile': 'Región 2 - CL, PE',
            'Perú': 'Región 2 - CL, PE',
            'Peru': 'Región 2 - CL, PE',
            'Colombia': 'Región 3 - CO, EC',
            'Ecuador': 'Región 3 - CO, EC',
            'México': 'Región 4 - MX',
            'Mexico': 'Región 4 - MX',
            'Costa Rica': 'Región 5 - Centroamérica',
            'Guatemala': 'Región 5 - Centroamérica',
            'Honduras': 'Región 5 - Centroamérica',
            'República Dominicana': 'Región 5 - Centroamérica',
            'Republica Dominicana': 'Región 5 - Centroamérica',
            'Guatemala': 'Región 5 - Centroamérica',
            'Puerto Rico': 'Región 5 - Centroamérica',
            'Nicaragua': 'Región 5 - Centroamérica',
            'Panama': 'Región 5 - Centroamérica',
            'Panamá': 'Región 5 - Centroamérica',
            'Brasil': 'Región 6 - BR',
            'España': 'Región 7 - Europa',
            # Agrega más países según sea necesario
        }

        # Función para mapear la región comercial evitando asignar 'Otra Región' cuando el país es nulo
        def obtener_region_comercial(pais):
            if pais is not None and pais in diccionario_region_comercial:
                return diccionario_region_comercial[pais]
            else:
                return None  # O puedes devolver algún valor predeterminado según tus necesidades

        # Aplicar la función para crear la nueva columna "region_comercial"
        df['region_comercial'] = df['pais'].map(obtener_region_comercial)


        # Export
        df.to_csv(csv_file[4:] + ".csv", encoding="latin-1", index=False, errors='ignore')

    


In [3]:
class SqlAPI:

    def __init__(self):
        self.conn = create_engine(DATABASE_CONNECTION_URI)

    def str_type_into_sqltype(self, datatype_dict):
        for i in datatype_dict:
            if datatype_dict[i] == "string":
                datatype_dict[i] = types.TEXT()
            elif datatype_dict[i] == "int64":
                datatype_dict[i] = types.NUMERIC()
            elif datatype_dict[i] == "float64":
                datatype_dict[i] = types.FLOAT()
            elif datatype_dict[i] == "datetime64":
                datatype_dict[i] = types.TIMESTAMP()
            elif datatype_dict[i] == "bool":
                datatype_dict[i] = types.BOOLEAN()
        return datatype_dict

    def insert_df(self, dataframe, table, dtype, index=False, if_exists="replace"):
        dataframe.to_sql(
            name=table,
            con=self.conn,
            index=index,
            if_exists=if_exists,
            method='multi',
            dtype=dtype,
            chunksize=10000
        )

    def update_table(self, table, dtype):

        print("Update Started " + table)
        csv_name = table + ".csv"
        df = pd.read_csv(csv_name, encoding='latin-1')
        columns = [column.lower() for column in df.columns]
        columns = ["_".join(column.split(" ")) for column in columns]
        df.columns = columns
        dtype = self.str_type_into_sqltype(dtype)
        self.insert_df(df, table, dtype)
        print("Update Finished " + table)

In [4]:
def dict_into_simple_dict(complex_dict, simple_value):
    headers = list(complex_dict.keys())
    simple_values_list = []
    for k in complex_dict:
        simple_values_list.append(complex_dict[k][simple_value])
    return dict(zip(headers, simple_values_list))


def list_of_datetimes(complex_dict):
    lst = []
    simple_dict = dict_into_simple_dict(complex_dict, "datatype")
    for item in simple_dict:
        if simple_dict[item] == "datetime64":
            lst.append(item)
    return lst

def update_database():
    # DB Schema Handling
    api_schema = json.load(open("api_schema_copy.json"))["API"]
    hubspot_schema = api_schema["Hubspot"]
   
    # Hubspot Schema
    h_deals_properties = dict_into_simple_dict(hubspot_schema["Deals"]["Properties"], "header_name")
    h_deals_values = hubspot_schema["Deals"]["Values"]
    h_deals_datecolumns = list_of_datetimes(hubspot_schema["Deals"]["Properties"])
    h_deals_datecolumns = [h_deals_properties[i] for i in h_deals_datecolumns]
    h_deals_datatypes = dict(zip(h_deals_properties.values(),
                                    dict_into_simple_dict(hubspot_schema["Deals"]["Properties"],
                                                          "datatype").values()))
   
    h_generalvalues = hubspot_schema["General Values"]
    
    # Class Variables Declaration
    s = SqlAPI()
    h = HubspotAPI()

    # Hubspot Export
    h.raw_export_deals(h_deals_properties)
 
    # Data Handle
    h.handle_raw_hubspot("raw_hubspot_deals", h_generalvalues, h_deals_properties, h_deals_values,
                         h_deals_datecolumns, h_deals_datatypes)
  
    # Tablas a actualizar en la DB
    tables = [
        "hubspot_deals"
    ]

    tables_datatypes = [
        h_deals_datatypes
    ]
    for index, table in enumerate(tables):
        s.update_table(table, tables_datatypes[index])

In [5]:
update_database()

os.remove("raw_hubspot_deals" + ".csv")
os.remove("hubspot_deals" + ".csv")


Hubspot Deals Export has gathered 100 Deals
Hubspot Deals Export has gathered 200 Deals
Hubspot Deals Export has gathered 300 Deals
Hubspot Deals Export has gathered 400 Deals
Hubspot Deals Export has gathered 500 Deals
Hubspot Deals Export has gathered 600 Deals
Hubspot Deals Export has gathered 700 Deals
Hubspot Deals Export has gathered 800 Deals
Hubspot Deals Export has gathered 900 Deals
Hubspot Deals Export has gathered 1000 Deals
Hubspot Deals Export has gathered 1100 Deals
Hubspot Deals Export has gathered 1200 Deals
Hubspot Deals Export has gathered 1300 Deals
Hubspot Deals Export has gathered 1400 Deals
Hubspot Deals Export has gathered 1500 Deals
Hubspot Deals Export has gathered 1600 Deals
Hubspot Deals Export has gathered 1700 Deals
Hubspot Deals Export has gathered 1800 Deals
Hubspot Deals Export has gathered 1900 Deals
Hubspot Deals Export has gathered 2000 Deals
Hubspot Deals Export has gathered 2100 Deals
Hubspot Deals Export has gathered 2200 Deals
Hubspot Deals Expor