In [1]:
import pandas as pd

# Cargar los archivos CSV
flights_df = pd.read_csv('../data/flights.csv')
hotels_df = pd.read_csv('../data/hotels.csv')
users_df = pd.read_csv('../data/users.csv')

# Verificar las columnas de cada DataFrame
print("Columns in flights_df:", flights_df.columns)
print("Columns in hotels_df:", hotels_df.columns)
print("Columns in users_df:", users_df.columns)

# Verificar los primeros registros de cada DataFrame
print("First few rows of flights_df:\n", flights_df.head())
print("First few rows of hotels_df:\n", hotels_df.head())
print("First few rows of users_df:\n", users_df.head())

# Renombrar columnas para evitar conflictos
flights_df_renamed = flights_df.rename(columns={'userCode': 'userCode_flight'})
hotels_df_renamed = hotels_df.rename(columns={'userCode': 'userCode_hotel'})

# Paso 1: Unir flights y hotels por travelCode
flights_hotels_df = pd.merge(flights_df_renamed, hotels_df_renamed, on='travelCode', suffixes=('_flight', '_hotel'))

# Verificar el DataFrame resultante
print("First few rows of flights_hotels_df:\n", flights_hotels_df.head())

# Paso 2: Unir el resultado con users por userCode
combined_df = pd.merge(flights_hotels_df, users_df, left_on='userCode_flight', right_on='code')

# Verificar el DataFrame final combinado
print("First few rows of combined_df:\n", combined_df.head())

# Si necesitas guardar el resultado combinado en un archivo CSV
combined_df.to_csv('../data/result/combined_data.csv', index=False)


Columns in flights_df: Index(['travelCode', 'userCode', 'from', 'to', 'flightType', 'price', 'time',
       'distance', 'agency', 'date'],
      dtype='object')
Columns in hotels_df: Index(['travelCode', 'userCode', 'name', 'place', 'days', 'price', 'total',
       'date'],
      dtype='object')
Columns in users_df: Index(['code', 'company', 'name', 'gender', 'age'], dtype='object')
First few rows of flights_df:
    travelCode  userCode                from                  to  flightType   
0           0         0         Recife (PE)  Florianopolis (SC)  firstClass  \
1           0         0  Florianopolis (SC)         Recife (PE)  firstClass   
2           1         0       Brasilia (DF)  Florianopolis (SC)  firstClass   
3           1         0  Florianopolis (SC)       Brasilia (DF)  firstClass   
4           2         0        Aracaju (SE)       Salvador (BH)  firstClass   

     price  time  distance       agency        date  
0  1434.38  1.76    676.53  FlyingDrops  09/26/2019  


In [3]:

# Si necesitas guardar el resultado combinado en un archivo CSV
combined_df.info()


Unnamed: 0,travelCode,userCode_flight,price_flight,time,distance,userCode_hotel,days,price_hotel,total,code,age
count,81104.0,81104.0,81104.0,81104.0,81104.0,81104.0,81104.0,81104.0,81104.0,81104.0,81104.0
mean,67911.794461,666.963726,957.725281,1.424116,548.097771,666.963726,2.499679,214.439554,536.229513,666.963726,42.790615
std,39407.956381,391.134383,362.212893,0.543071,209.053667,391.134383,1.119319,76.741832,319.329514,391.134383,12.942576
min,0.0,0.0,301.51,0.44,168.22,0.0,1.0,60.39,60.39,0.0,21.0
25%,33696.75,323.0,672.66,1.04,401.66,323.0,1.0,165.99,247.62,323.0,32.0
50%,67831.0,658.0,899.6,1.46,562.14,658.0,2.0,242.88,495.24,658.0,42.0
75%,102211.25,1013.0,1222.24,1.76,676.53,1013.0,4.0,263.41,742.86,1013.0,54.0
max,135942.0,1339.0,1754.17,2.44,937.77,1339.0,4.0,313.02,1252.08,1339.0,65.0
