# FINAL DATA
The present notebook is used to unify all the original data together with the data added later. Duplication is avoided and node identifiers are reindexed.

## Get final node data

In [21]:
import pandas as pd

df = pd.read_csv('BusinessName.csv', sep=';', encoding='latin1')

df_new = pd.read_csv('newBusinessName.csv', sep=';', encoding='latin1')

print(len(df))
print(df)
print(len(df_new))
print(df_new)


200
     codnode   latitude  longitude  ZIPcod  IdDeliveryZone  \
0    1021947  40.271729  -3.699045   28906             135   
1     162355  40.271983  -3.699120   28906             135   
2     587503  40.272769  -3.697199   28906             135   
3     613641  40.281410  -3.786190   28944               9   
4     136198  40.298509  -3.925754   28939             115   
..       ...        ...        ...     ...             ...   
195   159552  40.546879  -3.622070   28703             242   
196   931181  40.567200  -3.609220   28702              68   
197   141175  40.606670  -3.717610   28760             290   
198   607068  40.636101  -4.009720   28400             289   
199   125882  40.640179  -4.010300   28400             289   

                                               Address  Valid  \
0                  Nassica, M-50, 28320 Getafe, España   True   
1                  Nassica, M-50, 28320 Getafe, España   True   
2                  Nassica, M-50, 28320 Getafe, España  

In [22]:
# Merge DataFrames
df_combined = pd.merge(df, df_new, how='outer')
print('All ', len(df_combined))

df_combined = df_combined[df_combined['Valid'] == True]
print('All valid ', len(df_combined))

# Drop duplicated 'codnode'
df_combined = df_combined.drop_duplicates(subset=['codnode'])
print('Todos los válidos SIN REPETIDOS ', len(df_combined))

print(df_combined)

Todos  291
Todos los válidos  234
Todos los válidos SIN REPETIDOS  233
     codnode   latitude  longitude  ZIPcod  IdDeliveryZone  \
0    1021947  40.271729  -3.699045   28906             135   
1     162355  40.271983  -3.699120   28906             135   
2     587503  40.272769  -3.697199   28906             135   
3     613641  40.281410  -3.786190   28944               9   
4     136198  40.298509  -3.925754   28939             115   
..       ...        ...        ...     ...             ...   
286   149157  40.427990  -3.686270   28001              47   
287   995939  40.501080  -3.892140   28232              89   
288   609821  40.504250  -3.707070   28049             238   
289   154178  40.508629  -3.667180   28050             136   
290   158205  40.305180  -3.729910   28901             146   

                                               Address  Valid  \
0                  Nassica, M-50, 28320 Getafe, España   True   
1                  Nassica, M-50, 28320 Getafe, España

In [54]:
df = pd.read_csv('DatosNodos.csv', sep=';')
# Replace "," with "."
df['latitude'] = df['latitude'].str.replace(',', '.')
df['longitude'] = df['longitude'].str.replace(',', '.')

# Convert the coordinates to float
df['latitude'] = df['latitude'].astype(float)
df['longitude'] = df['longitude'].astype(float)

df_new = pd.read_csv('newDatosNodos.csv', sep=';')
datosNodos = pd.merge(df, df_new, how='outer') 
print(datosNodos)

print('All data ', len(datosNodos))
datosNodos

datosNodos = datosNodos.drop_duplicates(subset=['codnode'])
print('All SIN REPETIDOS ', len(datosNodos))

print(datosNodos)


valid_codnode = df_combined['codnode'].to_list()
datosNodos = datosNodos[datosNodos['codnode'].isin(valid_codnode)]
print('Todos los válidos  ', len(datosNodos))


datosNodosFinal = pd.merge(datosNodos, df_combined[['codnode', 'Address', 'Business']], on='codnode', how='inner')
datosNodosFinal = datosNodosFinal.drop('IdDeliveryZone', axis=1)


datosNodosFinal['new_codnode'] = datosNodosFinal.index + 1
datosNodosFinal_old = datosNodosFinal.copy()

# Eliminar la columna 'codnode'
datosNodosFinal.drop(columns=['codnode'], inplace=True)

# Renombrar la columna 'new_codnode' como 'codnode'
datosNodosFinal.rename(columns={'new_codnode': 'codnode'}, inplace=True)

columnas = ['codnode'] + [col for col in datosNodosFinal.columns if col != 'codnode']
datosNodosFinal = datosNodosFinal[columnas]

datosNodosFinal

datosNodosFinal.to_csv('nodeData.csv', sep=';', encoding='latin-1', index=False)

     codnode   latitude  longitude  ZIPcod  IdDeliveryZone
0     121342  40.345699  -3.826030   28921              83
1    1018226  40.379940  -3.696700   28041             128
2     996618  40.418892  -3.689340   28001             199
3     173838  40.504395  -3.702736   28049             238
4     129378  40.321831  -3.714220   28903              66
..       ...        ...        ...     ...             ...
295   149157  40.427990  -3.686270   28001              47
296   995939  40.501080  -3.892140   28232              89
297   609821  40.504250  -3.707070   28049             238
298   154178  40.508629  -3.667180   28050             136
299   158205  40.305180  -3.729910   28901             146

[300 rows x 5 columns]
Total de datos  300
Todos SIN REPETIDOS  299
     codnode   latitude  longitude  ZIPcod  IdDeliveryZone
0     121342  40.345699  -3.826030   28921              83
1    1018226  40.379940  -3.696700   28041             128
2     996618  40.418892  -3.689340   28001    

## Get final demand data

In [55]:
datosNodosFinal_old

Unnamed: 0,codnode,latitude,longitude,ZIPcod,Address,Business,new_codnode
0,121342,40.345699,-3.82603,28921,"Dia, Calle Espada, 28921 Alcorcón, España",Dia,1
1,1018226,40.379940,-3.69670,28041,"Avenida de Córdoba, 33, 28026 Madrid, España",TH Asesores,2
2,996618,40.418892,-3.68934,28001,"Calle de Valenzuela, 7, 28014 Madrid, España",Barracuda Mx,3
3,129378,40.321831,-3.71422,28903,"Avenida de Rigoberta Menchú, 28093 Getafe, España",MAPFRE,4
4,157936,40.427849,-3.68861,28001,"Calle de Ayala, 6, 28001 Madrid, España",Restaurante Ten Con Ten,5
...,...,...,...,...,...,...,...
228,149157,40.427990,-3.68627,28001,"Joyería Perez, Calle de Ayala, 28001 Madrid, E...",Joyería Perez,229
229,995939,40.501080,-3.89214,28232,"Europolis Sport Center, Calle Oporto, 28230 La...",Europolis Sport Center,230
230,609821,40.504250,-3.70707,28049,"El Peregrino, Avenida del Monasterio de El Esc...",El Peregrino,231
231,154178,40.508629,-3.66718,28050,"La Vaca Picada, Paseo de la Tierra de Melide, ...",La Vaca Picada,232


In [56]:
import pandas as pd

datosDemanda = pd.read_csv('DatosDemandaAll.csv', sep=';')

datosDemanda['Pallets'] = [float(str(value).replace(',', '.')) for value in datosDemanda['Pallets']]

datosDemanda['Date'] = pd.to_datetime(datosDemanda['Date'], format='%d/%m/%Y')

datosDemanda = datosDemanda.drop('DateTime', axis=1)
datosDemanda = datosDemanda.drop_duplicates(subset=['codnode','Date'])


valid_codnode = datosNodos['codnode'].to_list()
datosDemanda = datosDemanda[datosDemanda['codnode'].isin(valid_codnode)]
datosDemanda = pd.merge(datosDemanda, datosNodosFinal_old[['codnode', 'new_codnode']], on='codnode', how='inner')
datosDemanda.drop(columns=['codnode'], inplace=True)
datosDemanda.rename(columns={'new_codnode': 'codnode'}, inplace=True)
columnas = ['codnode'] + [col for col in datosDemanda.columns if col != 'codnode']
datosDemanda = datosDemanda[columnas]
conteo_por_codnode = datosDemanda['codnode'].value_counts()
print(conteo_por_codnode)

datosDemanda

datosDemanda.to_csv('demandData.csv', sep=';', encoding='latin-1', index=False)

codnode
61     296
99     267
18     252
122    248
77     248
      ... 
106    132
165    129
94     127
207    121
12     117
Name: count, Length: 232, dtype: int64
