In [1]:
import pandas as pd
import numpy as np
from faker import Faker
import random
import datetime
import boto3
import psycopg2
import configparser

### Creación del modelo de datos

In [2]:
rdsIdentifier = 'super-db' #nombre de la instancia

#### Cargamos archivo de configuraciones

In [3]:
config = configparser.ConfigParser()
config.read('escec2.cfg')

['escec2.cfg']

### Creamos Instancia de S3 y RDS

In [4]:
s3 = boto3.resource(
    service_name = 's3',
    region_name = 'us-east-1',
    aws_access_key_id = config.get('IAM', 'ACCESS_KEY'),
    aws_secret_access_key = config.get('IAM', 'SECRET_ACCESS_KEY')
)
aws_conn = boto3.client('rds', aws_access_key_id=config.get('IAM', 'ACCESS_KEY'),
                    aws_secret_access_key=config.get('IAM', 'SECRET_ACCESS_KEY'),
                    region_name='us-east-1')

#### Verificamos Instancias de RDS disponibles

In [5]:
rdsInstanceIds = []

response = aws_conn.describe_db_instances()
for resp in response['DBInstances']:
    rdsInstanceIds.append(resp['DBInstanceIdentifier'])
    db_instance_status = resp['DBInstanceStatus']

print(f"DBInstanceIds {rdsInstanceIds}")

DBInstanceIds ['super-db']


#### Creación de Servicio RDS

In [6]:
try:
    response = aws_conn.create_db_instance(
            AllocatedStorage=10,
            DBName=config.get('RDS_MYSQL', 'DB_NAME'),
            DBInstanceIdentifier=rdsIdentifier,
            DBInstanceClass="db.t3.micro",
            Engine="mysql",
            MasterUsername=config.get('RDS_MYSQL', 'DB_USER'),
            MasterUserPassword=config.get('RDS_MYSQL', 'DB_PASSWORD'),
            BackupRetentionPeriod=0, #para evitar los backups y que nos cobren
            Port=int(config.get('RDS_MYSQL', 'DB_PORT')),
            VpcSecurityGroupIds=[config.get('VPC', 'SECURITY_GROUP')],
            PubliclyAccessible=True
        )
    print(response)
except aws_conn.exceptions.DBInstanceAlreadyExistsFault as ex:
    print("La Instancia de Base de Datos ya Existe.")

La Instancia de Base de Datos ya Existe.


##### Obtenemos URL del Host

In [7]:
try:
     instances = aws_conn.describe_db_instances(DBInstanceIdentifier=rdsIdentifier)
     RDS_HOST = instances.get('DBInstances')[0].get('Endpoint').get('Address')
     print(RDS_HOST)
except Exception as ex:
     print("La instancia de base de datos no existe o aun no se ha terminado de crear.")
     print(ex)

super-db.cio9bwv4hzyt.us-east-1.rds.amazonaws.com


#### Conexión a Base de Datos desde Python

In [8]:
import querie_super

In [9]:
import querie_super
import mysql.connector as mysqlC
try:
    myDw = mysqlC.connect(
    host=RDS_HOST, 
    user=config.get('RDS_MYSQL', 'DB_USER'),
    password=config.get('RDS_MYSQL', 'DB_PASSWORD'),
    database=config.get('RDS_MYSQL', 'DB_NAME')
    )

    mycursor = myDw.cursor()
    mycursor.execute(querie_super.DDL_QUERY_SUPER, multi=True)
    myDw.commit()
    print("Data Warehouse Creado Exitosamente")
except Exception as ex:
    print("ERROR: Error al crear la base de datos.")
    print(ex)

#Aunque nos de un error, verificar en DBeaver si se creó el query y las tablas

ERROR: Error al crear la base de datos.
1050 (42S01): Table 'Ubicacion' already exists


#### Driver MYSQL


In [10]:
mysql_driver = f"""mysql+pymysql://{config.get('RDS_MYSQL', 'DB_USER')}:{config.get('RDS_MYSQL', 'DB_PASSWORD')}@{RDS_HOST}:{config.get('RDS_MYSQL', 'DB_PORT')}/{config.get('RDS_MYSQL', 'DB_NAME')}"""  

In [11]:
sql_query = 'SELECT * FROM Ubicacion;'
df_ubicacion = pd.read_sql(sql_query, mysql_driver)
df_ubicacion.head()

Unnamed: 0,ID_Ubicacion,Codigo_postal,Pais,Estado,Ciudad
0,1,55407.0,United States,Minnesota,Minneapolis
1,2,94109.0,United States,California,San Francisco
2,3,94122.0,United States,California,San Francisco
3,4,78664.0,United States,Texas,Round Rock
4,5,10011.0,United States,New York,New York City


#### Creamos el bucket y cargamos los datos

In [12]:
# Nombre del bucket
bucket_name = 'datos-super-edgar'

In [13]:
# Creación del bucket
s3.create_bucket(Bucket=bucket_name)

# documentos
file_name = 'dim_date.csv'
file_name2 = 'SuperStoreOutput.csv'

# Nombre del archivo csv en el bucket
s3_file_name = 'dim_date_super.csv'
s3_file_name2 = 'SuperStoreOutput.csv'

# Metiendo los archivos al bucket
s3.upload_file(file_name, bucket_name, s3_file_name)
s3.upload_file(file_name2, bucket_name, s3_file_name2)

#Acabo de cargar los datos de fechas y la base de datos principal



AttributeError: 's3.ServiceResource' object has no attribute 'upload_file'

In [14]:
for bucket in s3.buckets.all():
    S3_BUCKET_NAME = bucket.name
    print(bucket.name)

bucket-v-23000966
datos-super-edgar


In [15]:
#extraemos todo lo que está en el bucket
remoteFileList = []
for objt in s3.Bucket(bucket_name).objects.all():
    remoteFileList.append(objt.key)

remoteFileList

['SuperStoreOutput.csv', 'dim_date_super.csv']

#### Leemos archivo del bucket de S3

In [16]:
# Cargamos el data frame de supermercado
file1 = s3.Bucket(bucket_name).Object('SuperStoreOutput.csv').get()
tabla_super= pd.read_csv(file1['Body'])
tabla_super.head() 

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1160,CA-2018-147039,6/29/2018 12:00:00 AM,7/4/2018 12:00:00 AM,Standard Class,AA-10315,Alex Avila,Consumer,United States,Minneapolis,...,55407.0,Central,OFF-AP-10000576,Office Supplies,Appliances,"Belkin 325VA UPS Surge Protector, 6'",362.94,3,0.0,90.74
1,1161,CA-2018-147039,6/29/2018 12:00:00 AM,7/4/2018 12:00:00 AM,Standard Class,AA-10315,Alex Avila,Consumer,United States,Minneapolis,...,55407.0,Central,OFF-BI-10004654,Office Supplies,Binders,Avery Binding System Hidden Tab Executive Styl...,11.54,2,0.0,5.77
2,1300,CA-2016-121391,10/4/2016 12:00:00 AM,10/7/2016 12:00:00 AM,First Class,AA-10315,Alex Avila,Consumer,United States,San Francisco,...,94109.0,West,OFF-ST-10001590,Office Supplies,Storage,Tenex Personal Project File with Scoop Front D...,26.96,2,0.0,7.01
3,2230,CA-2015-128055,3/31/2015 12:00:00 AM,4/5/2015 12:00:00 AM,Standard Class,AA-10315,Alex Avila,Consumer,United States,San Francisco,...,94122.0,West,OFF-BI-10004390,Office Supplies,Binders,GBC DocuBind 200 Manual Binding Machine,673.57,2,0.2,252.59
4,2231,CA-2015-128055,3/31/2015 12:00:00 AM,4/5/2015 12:00:00 AM,Standard Class,AA-10315,Alex Avila,Consumer,United States,San Francisco,...,94122.0,West,OFF-AP-10002765,Office Supplies,Appliances,Fellowes Advanced Computer Series Surge Protec...,52.98,2,0.0,14.83


In [17]:
# Cargamos el data frame de fechas
file2 = s3.Bucket(bucket_name).Object('dim_date_super.csv').get()
fechas= pd.read_csv(file2['Body'])
fechas.head()

Unnamed: 0,date_key,full_date,day_of_week,day_num_in_month,day_num_overall,day_name,day_abbrev,weekday_flag,week_num_in_year,week_num_overall,...,month_name,month_abbrev,quarter,year,yearmo,fiscal_month,fiscal_quarter,fiscal_year,last_day_in_month_flag,same_day_year_ago_date
0,20150101,1/1/2015,4,1,1,Thursday,Thu,Weekday,1,1,...,January,Jan,1,2015,201501,7,3,2015,Not Month End,1/1/2014
1,20150102,1/2/2015,5,2,2,Friday,Fri,Weekday,1,1,...,January,Jan,1,2015,201501,7,3,2015,Not Month End,1/2/2014
2,20150103,1/3/2015,6,3,3,Saturday,Sat,Weekend,1,1,...,January,Jan,1,2015,201501,7,3,2015,Not Month End,1/3/2014
3,20150104,1/4/2015,7,4,4,Sunday,Sun,Weekend,1,1,...,January,Jan,1,2015,201501,7,3,2015,Not Month End,1/4/2014
4,20150105,1/5/2015,1,5,5,Monday,Mon,Weekday,2,2,...,January,Jan,1,2015,201501,7,3,2015,Not Month End,1/5/2014


### Limpiamos las tablas para ser ingresadas a las dimensiones y hechos (Procesamiento)

#### Creamos tabla para cliente e insertamos en la dimensión

In [18]:
tabla_cl = tabla_super.loc[:, ['Customer ID','Customer Name', 'Segment']]

nombres_cl = {'Customer ID': 'Id_cliente', 'Customer Name': 'Nombre', 'Segment': 'Segmento'}
tabla_cl.rename(columns = nombres_cl, inplace = True)

tabla_clientes= tabla_cl.drop_duplicates()
tabla_clientes.head()

Unnamed: 0,Id_cliente,Nombre,Segmento
0,AA-10315,Alex Avila,Consumer
11,AA-10375,Allen Armold,Consumer
26,AA-10480,Andrew Allen,Consumer
38,AA-10645,Anna Andreadi,Consumer
56,AB-10015,Aaron Bergman,Consumer


In [19]:
#insertamos en dimension de cliente
tabla_clientes.to_sql('Cliente', mysql_driver, index=False, if_exists='append')

IntegrityError: (pymysql.err.IntegrityError) (1062, "Duplicate entry 'AA-10315' for key 'Cliente.PRIMARY'")
[SQL: INSERT INTO `Cliente` (`Id_cliente`, `Nombre`, `Segmento`) VALUES (%(Id_cliente)s, %(Nombre)s, %(Segmento)s)]
[parameters: ({'Id_cliente': 'AA-10315', 'Nombre': 'Alex Avila', 'Segmento': 'Consumer'}, {'Id_cliente': 'AA-10375', 'Nombre': 'Allen Armold', 'Segmento': 'Consumer'}, {'Id_cliente': 'AA-10480', 'Nombre': 'Andrew Allen', 'Segmento': 'Consumer'}, {'Id_cliente': 'AA-10645', 'Nombre': 'Anna Andreadi', 'Segmento': 'Consumer'}, {'Id_cliente': 'AB-10015', 'Nombre': 'Aaron Bergman', 'Segmento': 'Consumer'}, {'Id_cliente': 'AB-10060', 'Nombre': 'Adam Bellavance', 'Segmento': 'Home Office'}, {'Id_cliente': 'AB-10105', 'Nombre': 'Adrian Barton', 'Segmento': 'Consumer'}, {'Id_cliente': 'AB-10150', 'Nombre': 'Aimee Bixby', 'Segmento': 'Consumer'}  ... displaying 10 of 793 total bound parameter sets ...  {'Id_cliente': 'ZC-21910', 'Nombre': 'Zuschuss Carroll', 'Segmento': 'Consumer'}, {'Id_cliente': 'ZD-21925', 'Nombre': 'Zuschuss Donatelli', 'Segmento': 'Consumer'})]
(Background on this error at: https://sqlalche.me/e/14/gkpj)

#### Creamos tabla para producto

In [20]:
tabla_pr = tabla_super.loc[:, ['Product ID','Category', 'Sub-Category', 'Product Name']]
tabla_producto= tabla_pr.drop_duplicates()

tabla_producto.head()


Unnamed: 0,Product ID,Category,Sub-Category,Product Name
0,OFF-AP-10000576,Office Supplies,Appliances,"Belkin 325VA UPS Surge Protector, 6'"
1,OFF-BI-10004654,Office Supplies,Binders,Avery Binding System Hidden Tab Executive Styl...
2,OFF-ST-10001590,Office Supplies,Storage,Tenex Personal Project File with Scoop Front D...
3,OFF-BI-10004390,Office Supplies,Binders,GBC DocuBind 200 Manual Binding Machine
4,OFF-AP-10002765,Office Supplies,Appliances,Fellowes Advanced Computer Series Surge Protec...


#### Creamos tabla para Ubicación

In [21]:
tabla_ub = tabla_super.loc[:, ['Postal Code','Country', 'State', 'City']]
tabla_ubicacion= tabla_ub.drop_duplicates()

tabla_ubicacion.head()

Unnamed: 0,Postal Code,Country,State,City
0,55407.0,United States,Minnesota,Minneapolis
2,94109.0,United States,California,San Francisco
3,94122.0,United States,California,San Francisco
5,78664.0,United States,Texas,Round Rock
9,10011.0,United States,New York,New York City


#### Creamos tabla para fechas de orden

In [22]:
tabla_fechas = fechas.loc[:, ['date_key', 'full_date','day_of_week', 'day_num_in_month', 'day_name', 'weekday_flag','month_name','month_abbrev','year']]

tabla_fechas.head()

Unnamed: 0,date_key,full_date,day_of_week,day_num_in_month,day_name,weekday_flag,month_name,month_abbrev,year
0,20150101,1/1/2015,4,1,Thursday,Weekday,January,Jan,2015
1,20150102,1/2/2015,5,2,Friday,Weekday,January,Jan,2015
2,20150103,1/3/2015,6,3,Saturday,Weekend,January,Jan,2015
3,20150104,1/4/2015,7,4,Sunday,Weekend,January,Jan,2015
4,20150105,1/5/2015,1,5,Monday,Weekday,January,Jan,2015


### Comenzamos con proceso para carga hacia tabla de hechos

In [26]:
tabla_super = tabla_super.drop('Ship Mode',axis=1)
tabla_super

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1160,CA-2018-147039,6/29/2018 12:00:00 AM,7/4/2018 12:00:00 AM,AA-10315,Alex Avila,Consumer,United States,Minneapolis,Minnesota,55407.0,Central,OFF-AP-10000576,Office Supplies,Appliances,"Belkin 325VA UPS Surge Protector, 6'",362.94,3,0.0,90.74
1,1161,CA-2018-147039,6/29/2018 12:00:00 AM,7/4/2018 12:00:00 AM,AA-10315,Alex Avila,Consumer,United States,Minneapolis,Minnesota,55407.0,Central,OFF-BI-10004654,Office Supplies,Binders,Avery Binding System Hidden Tab Executive Styl...,11.54,2,0.0,5.77
2,1300,CA-2016-121391,10/4/2016 12:00:00 AM,10/7/2016 12:00:00 AM,AA-10315,Alex Avila,Consumer,United States,San Francisco,California,94109.0,West,OFF-ST-10001590,Office Supplies,Storage,Tenex Personal Project File with Scoop Front D...,26.96,2,0.0,7.01
3,2230,CA-2015-128055,3/31/2015 12:00:00 AM,4/5/2015 12:00:00 AM,AA-10315,Alex Avila,Consumer,United States,San Francisco,California,94122.0,West,OFF-BI-10004390,Office Supplies,Binders,GBC DocuBind 200 Manual Binding Machine,673.57,2,0.2,252.59
4,2231,CA-2015-128055,3/31/2015 12:00:00 AM,4/5/2015 12:00:00 AM,AA-10315,Alex Avila,Consumer,United States,San Francisco,California,94122.0,West,OFF-AP-10002765,Office Supplies,Appliances,Fellowes Advanced Computer Series Surge Protec...,52.98,2,0.0,14.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9989,3815,CA-2017-152471,7/8/2017 12:00:00 AM,7/8/2017 12:00:00 AM,ZD-21925,Zuschuss Donatelli,Consumer,United States,Jacksonville,Florida,32216.0,South,TEC-PH-10002824,Technology,Phones,Jabra SPEAK 410 Multidevice Speakerphone,823.96,5,0.2,51.50
9990,3816,CA-2017-152471,7/8/2017 12:00:00 AM,7/8/2017 12:00:00 AM,ZD-21925,Zuschuss Donatelli,Consumer,United States,Jacksonville,Florida,32216.0,South,OFF-PA-10004965,Office Supplies,Paper,Xerox 1921,15.98,2,0.2,4.99
9991,5898,CA-2017-167682,4/3/2017 12:00:00 AM,4/9/2017 12:00:00 AM,ZD-21925,Zuschuss Donatelli,Consumer,United States,Richmond,Indiana,47374.0,Central,FUR-FU-10003799,Furniture,Furnishings,"Seth Thomas 13 1/2"" Wall Clock",71.12,4,0.0,22.05
9992,5899,CA-2017-167682,4/3/2017 12:00:00 AM,4/9/2017 12:00:00 AM,ZD-21925,Zuschuss Donatelli,Consumer,United States,Richmond,Indiana,47374.0,Central,TEC-PH-10000673,Technology,Phones,Plantronics Voyager Pro HD - Bluetooth Headset,259.96,4,0.0,124.78


In [27]:
tabla_super = tabla_super.drop('Ship Date',axis=1)
tabla_super

Unnamed: 0,Row ID,Order ID,Order Date,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1160,CA-2018-147039,6/29/2018 12:00:00 AM,AA-10315,Alex Avila,Consumer,United States,Minneapolis,Minnesota,55407.0,Central,OFF-AP-10000576,Office Supplies,Appliances,"Belkin 325VA UPS Surge Protector, 6'",362.94,3,0.0,90.74
1,1161,CA-2018-147039,6/29/2018 12:00:00 AM,AA-10315,Alex Avila,Consumer,United States,Minneapolis,Minnesota,55407.0,Central,OFF-BI-10004654,Office Supplies,Binders,Avery Binding System Hidden Tab Executive Styl...,11.54,2,0.0,5.77
2,1300,CA-2016-121391,10/4/2016 12:00:00 AM,AA-10315,Alex Avila,Consumer,United States,San Francisco,California,94109.0,West,OFF-ST-10001590,Office Supplies,Storage,Tenex Personal Project File with Scoop Front D...,26.96,2,0.0,7.01
3,2230,CA-2015-128055,3/31/2015 12:00:00 AM,AA-10315,Alex Avila,Consumer,United States,San Francisco,California,94122.0,West,OFF-BI-10004390,Office Supplies,Binders,GBC DocuBind 200 Manual Binding Machine,673.57,2,0.2,252.59
4,2231,CA-2015-128055,3/31/2015 12:00:00 AM,AA-10315,Alex Avila,Consumer,United States,San Francisco,California,94122.0,West,OFF-AP-10002765,Office Supplies,Appliances,Fellowes Advanced Computer Series Surge Protec...,52.98,2,0.0,14.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9989,3815,CA-2017-152471,7/8/2017 12:00:00 AM,ZD-21925,Zuschuss Donatelli,Consumer,United States,Jacksonville,Florida,32216.0,South,TEC-PH-10002824,Technology,Phones,Jabra SPEAK 410 Multidevice Speakerphone,823.96,5,0.2,51.50
9990,3816,CA-2017-152471,7/8/2017 12:00:00 AM,ZD-21925,Zuschuss Donatelli,Consumer,United States,Jacksonville,Florida,32216.0,South,OFF-PA-10004965,Office Supplies,Paper,Xerox 1921,15.98,2,0.2,4.99
9991,5898,CA-2017-167682,4/3/2017 12:00:00 AM,ZD-21925,Zuschuss Donatelli,Consumer,United States,Richmond,Indiana,47374.0,Central,FUR-FU-10003799,Furniture,Furnishings,"Seth Thomas 13 1/2"" Wall Clock",71.12,4,0.0,22.05
9992,5899,CA-2017-167682,4/3/2017 12:00:00 AM,ZD-21925,Zuschuss Donatelli,Consumer,United States,Richmond,Indiana,47374.0,Central,TEC-PH-10000673,Technology,Phones,Plantronics Voyager Pro HD - Bluetooth Headset,259.96,4,0.0,124.78


In [28]:
tabla_clientes

Unnamed: 0,Id_cliente,Nombre,Segmento
0,AA-10315,Alex Avila,Consumer
11,AA-10375,Allen Armold,Consumer
26,AA-10480,Andrew Allen,Consumer
38,AA-10645,Anna Andreadi,Consumer
56,AB-10015,Aaron Bergman,Consumer
...,...,...,...
9906,XP-21865,Xylona Preis,Consumer
9934,YC-21895,Yoseph Carroll,Corporate
9942,YS-21880,Yana Sorensen,Corporate
9954,ZC-21910,Zuschuss Carroll,Consumer


In [33]:
join_table = tabla_super.merge(tabla_clientes, 
                                                                                               left_on='Customer ID',
                                                                                               right_on='Id_cliente',
                                                                                               how='inner')
join_table = join_table.drop(columns=['Customer ID','Customer Name', 'Segment','Nombre','Segmento'])
join_table

Unnamed: 0,Row ID,Order ID,Order Date,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit,Id_cliente
0,1160,CA-2018-147039,6/29/2018 12:00:00 AM,United States,Minneapolis,Minnesota,55407.0,Central,OFF-AP-10000576,Office Supplies,Appliances,"Belkin 325VA UPS Surge Protector, 6'",362.94,3,0.0,90.74,AA-10315
1,1161,CA-2018-147039,6/29/2018 12:00:00 AM,United States,Minneapolis,Minnesota,55407.0,Central,OFF-BI-10004654,Office Supplies,Binders,Avery Binding System Hidden Tab Executive Styl...,11.54,2,0.0,5.77,AA-10315
2,1300,CA-2016-121391,10/4/2016 12:00:00 AM,United States,San Francisco,California,94109.0,West,OFF-ST-10001590,Office Supplies,Storage,Tenex Personal Project File with Scoop Front D...,26.96,2,0.0,7.01,AA-10315
3,2230,CA-2015-128055,3/31/2015 12:00:00 AM,United States,San Francisco,California,94122.0,West,OFF-BI-10004390,Office Supplies,Binders,GBC DocuBind 200 Manual Binding Machine,673.57,2,0.2,252.59,AA-10315
4,2231,CA-2015-128055,3/31/2015 12:00:00 AM,United States,San Francisco,California,94122.0,West,OFF-AP-10002765,Office Supplies,Appliances,Fellowes Advanced Computer Series Surge Protec...,52.98,2,0.0,14.83,AA-10315
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9989,3815,CA-2017-152471,7/8/2017 12:00:00 AM,United States,Jacksonville,Florida,32216.0,South,TEC-PH-10002824,Technology,Phones,Jabra SPEAK 410 Multidevice Speakerphone,823.96,5,0.2,51.50,ZD-21925
9990,3816,CA-2017-152471,7/8/2017 12:00:00 AM,United States,Jacksonville,Florida,32216.0,South,OFF-PA-10004965,Office Supplies,Paper,Xerox 1921,15.98,2,0.2,4.99,ZD-21925
9991,5898,CA-2017-167682,4/3/2017 12:00:00 AM,United States,Richmond,Indiana,47374.0,Central,FUR-FU-10003799,Furniture,Furnishings,"Seth Thomas 13 1/2"" Wall Clock",71.12,4,0.0,22.05,ZD-21925
9992,5899,CA-2017-167682,4/3/2017 12:00:00 AM,United States,Richmond,Indiana,47374.0,Central,TEC-PH-10000673,Technology,Phones,Plantronics Voyager Pro HD - Bluetooth Headset,259.96,4,0.0,124.78,ZD-21925


In [35]:
join_table = join_table.merge(tabla_ubicacion, 
                                                                                               left_on='Postal Code',
                                                                                               right_on='Postal Code',
                                                                                               how='inner')

join_table


Unnamed: 0,Row ID,Order ID,Order Date,Country_x,City_x,State_x,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit,Id_cliente,Country_y,State_y,City_y
0,1160,CA-2018-147039,6/29/2018 12:00:00 AM,United States,Minneapolis,Minnesota,55407.0,Central,OFF-AP-10000576,Office Supplies,Appliances,"Belkin 325VA UPS Surge Protector, 6'",362.94,3,0.0,90.74,AA-10315,United States,Minnesota,Minneapolis
1,1161,CA-2018-147039,6/29/2018 12:00:00 AM,United States,Minneapolis,Minnesota,55407.0,Central,OFF-BI-10004654,Office Supplies,Binders,Avery Binding System Hidden Tab Executive Styl...,11.54,2,0.0,5.77,AA-10315,United States,Minnesota,Minneapolis
2,1897,CA-2018-141789,10/3/2018 12:00:00 AM,United States,Minneapolis,Minnesota,55407.0,Central,OFF-BI-10001359,Office Supplies,Binders,GBC DocuBind TL300 Electric Binding System,1793.98,2,0.0,843.17,AC-10450,United States,Minnesota,Minneapolis
3,9218,US-2018-118157,11/14/2018 12:00:00 AM,United States,Minneapolis,Minnesota,55407.0,Central,OFF-EN-10004459,Office Supplies,Envelopes,Security-Tint Envelopes,15.28,2,0.0,7.49,AW-10930,United States,Minnesota,Minneapolis
4,6885,CA-2016-120677,5/31/2016 12:00:00 AM,United States,Minneapolis,Minnesota,55407.0,Central,FUR-CH-10002320,Furniture,Chairs,Hon Pagoda Stacking Chairs,2567.84,8,0.0,770.35,BD-11320,United States,Minnesota,Minneapolis
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10028,5413,CA-2018-166856,9/18/2018 12:00:00 AM,United States,Clovis,New Mexico,88101.0,West,TEC-AC-10004992,Technology,Accessories,Kingston Digital DataTraveler 64GB USB 2.0,101.34,3,0.0,8.11,TS-21505,United States,New Mexico,Clovis
10029,3216,CA-2018-150189,7/8/2018 12:00:00 AM,United States,San Mateo,California,94403.0,West,OFF-LA-10002762,Office Supplies,Labels,Avery 485,75.18,6,0.0,35.33,VG-21790,United States,California,San Mateo
10030,4341,US-2015-129609,3/22/2015 12:00:00 AM,United States,Portage,Indiana,46368.0,Central,OFF-AR-10003478,Office Supplies,Art,Avery Hi-Liter EverBold Pen Style Fluorescent ...,16.28,2,0.0,6.51,VM-21835,United States,Indiana,Portage
10031,9835,CA-2017-126627,10/10/2017 12:00:00 AM,United States,La Porte,Texas,77571.0,Central,FUR-FU-10004963,Furniture,Furnishings,"Eldon 400 Class Desk Accessories, Black Carbon",14.00,4,0.6,-6.30,WB-21850,United States,Texas,La Porte


In [36]:
join_table = join_table.drop(columns=['Country_x','Country_y','City_x','City_y','State_x','State_y','Region'])
join_table

Unnamed: 0,Row ID,Order ID,Order Date,Postal Code,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit,Id_cliente
0,1160,CA-2018-147039,6/29/2018 12:00:00 AM,55407.0,OFF-AP-10000576,Office Supplies,Appliances,"Belkin 325VA UPS Surge Protector, 6'",362.94,3,0.0,90.74,AA-10315
1,1161,CA-2018-147039,6/29/2018 12:00:00 AM,55407.0,OFF-BI-10004654,Office Supplies,Binders,Avery Binding System Hidden Tab Executive Styl...,11.54,2,0.0,5.77,AA-10315
2,1897,CA-2018-141789,10/3/2018 12:00:00 AM,55407.0,OFF-BI-10001359,Office Supplies,Binders,GBC DocuBind TL300 Electric Binding System,1793.98,2,0.0,843.17,AC-10450
3,9218,US-2018-118157,11/14/2018 12:00:00 AM,55407.0,OFF-EN-10004459,Office Supplies,Envelopes,Security-Tint Envelopes,15.28,2,0.0,7.49,AW-10930
4,6885,CA-2016-120677,5/31/2016 12:00:00 AM,55407.0,FUR-CH-10002320,Furniture,Chairs,Hon Pagoda Stacking Chairs,2567.84,8,0.0,770.35,BD-11320
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10028,5413,CA-2018-166856,9/18/2018 12:00:00 AM,88101.0,TEC-AC-10004992,Technology,Accessories,Kingston Digital DataTraveler 64GB USB 2.0,101.34,3,0.0,8.11,TS-21505
10029,3216,CA-2018-150189,7/8/2018 12:00:00 AM,94403.0,OFF-LA-10002762,Office Supplies,Labels,Avery 485,75.18,6,0.0,35.33,VG-21790
10030,4341,US-2015-129609,3/22/2015 12:00:00 AM,46368.0,OFF-AR-10003478,Office Supplies,Art,Avery Hi-Liter EverBold Pen Style Fluorescent ...,16.28,2,0.0,6.51,VM-21835
10031,9835,CA-2017-126627,10/10/2017 12:00:00 AM,77571.0,FUR-FU-10004963,Furniture,Furnishings,"Eldon 400 Class Desk Accessories, Black Carbon",14.00,4,0.6,-6.30,WB-21850


In [None]:
join_table = join_table.merge(tabla_producto, 
                                                                                               left_on='Postal Code',
                                                                                               right_on='Postal Code',
                                                                                               how='inner')

join_table

In [37]:
tabla_producto

Unnamed: 0,Product ID,Category,Sub-Category,Product Name
0,OFF-AP-10000576,Office Supplies,Appliances,"Belkin 325VA UPS Surge Protector, 6'"
1,OFF-BI-10004654,Office Supplies,Binders,Avery Binding System Hidden Tab Executive Styl...
2,OFF-ST-10001590,Office Supplies,Storage,Tenex Personal Project File with Scoop Front D...
3,OFF-BI-10004390,Office Supplies,Binders,GBC DocuBind 200 Manual Binding Machine
4,OFF-AP-10002765,Office Supplies,Appliances,Fellowes Advanced Computer Series Surge Protec...
...,...,...,...,...
9744,OFF-AP-10001124,Office Supplies,Appliances,Belkin 8 Outlet SurgeMaster II Gold Surge Prot...
9795,OFF-ST-10001414,Office Supplies,Storage,Decoflex Hanging Personal Folder File
9818,TEC-PH-10001468,Technology,Phones,Panasonic Business Telephones KX-T7736
9881,TEC-MA-10003246,Technology,Machines,Hewlett-Packard Deskjet D4360 Printer


In [38]:
join_table

Unnamed: 0,Row ID,Order ID,Order Date,Postal Code,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit,Id_cliente
0,1160,CA-2018-147039,6/29/2018 12:00:00 AM,55407.0,OFF-AP-10000576,Office Supplies,Appliances,"Belkin 325VA UPS Surge Protector, 6'",362.94,3,0.0,90.74,AA-10315
1,1161,CA-2018-147039,6/29/2018 12:00:00 AM,55407.0,OFF-BI-10004654,Office Supplies,Binders,Avery Binding System Hidden Tab Executive Styl...,11.54,2,0.0,5.77,AA-10315
2,1897,CA-2018-141789,10/3/2018 12:00:00 AM,55407.0,OFF-BI-10001359,Office Supplies,Binders,GBC DocuBind TL300 Electric Binding System,1793.98,2,0.0,843.17,AC-10450
3,9218,US-2018-118157,11/14/2018 12:00:00 AM,55407.0,OFF-EN-10004459,Office Supplies,Envelopes,Security-Tint Envelopes,15.28,2,0.0,7.49,AW-10930
4,6885,CA-2016-120677,5/31/2016 12:00:00 AM,55407.0,FUR-CH-10002320,Furniture,Chairs,Hon Pagoda Stacking Chairs,2567.84,8,0.0,770.35,BD-11320
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10028,5413,CA-2018-166856,9/18/2018 12:00:00 AM,88101.0,TEC-AC-10004992,Technology,Accessories,Kingston Digital DataTraveler 64GB USB 2.0,101.34,3,0.0,8.11,TS-21505
10029,3216,CA-2018-150189,7/8/2018 12:00:00 AM,94403.0,OFF-LA-10002762,Office Supplies,Labels,Avery 485,75.18,6,0.0,35.33,VG-21790
10030,4341,US-2015-129609,3/22/2015 12:00:00 AM,46368.0,OFF-AR-10003478,Office Supplies,Art,Avery Hi-Liter EverBold Pen Style Fluorescent ...,16.28,2,0.0,6.51,VM-21835
10031,9835,CA-2017-126627,10/10/2017 12:00:00 AM,77571.0,FUR-FU-10004963,Furniture,Furnishings,"Eldon 400 Class Desk Accessories, Black Carbon",14.00,4,0.6,-6.30,WB-21850


In [None]:
join_table = join_table.merge(tabla_clientes, 
                                                                                               left_on='Postal Code',
                                                                                               right_on='Postal Code',
                                                                                               how='inner')

join_table