In [None]:
import dask.dataframe as dd
import pandas as pd

## Horizontal concatenation

In [None]:
df = pd.DataFrame({"cities": ["Medellín", "Rio", "Bogotá", "Buenos Aires"]})
cities_ddf = dd.from_pandas(df, npartitions=2)

In [None]:
print(cities_ddf.compute())

         cities
0      Medellín
1           Rio
2        Bogotá
3  Buenos Aires


In [None]:
df = pd.DataFrame({"population": [2.6, 6.7, 7.2, 15.2]})
populations_ddf = dd.from_pandas(df, npartitions=2)

In [None]:
print(populations_ddf.compute())

   population
0         2.6
1         6.7
2         7.2
3        15.2


In [None]:
concat_ddf = dd.concat([cities_ddf, populations_ddf], axis=1)

In [None]:
print(concat_ddf.compute())

         cities  population
0      Medellín         2.6
1           Rio         6.7
2        Bogotá         7.2
3  Buenos Aires        15.2


### Mismatched indices

In [None]:
df = pd.DataFrame(
    {"country": ["Colombia", "Brasil", "Colombia", "Argentina"], "index": [2, 4, 6, 8]}
)
countries_ddf = dd.from_pandas(df, npartitions=2)

In [None]:
print(countries_ddf.compute())

     country  index
0   Colombia      2
1     Brasil      4
2   Colombia      6
3  Argentina      8


In [None]:
countries_ddf = countries_ddf.set_index("index")

In [None]:
print(countries_ddf.compute())

         country
index           
2       Colombia
4         Brasil
6       Colombia
8      Argentina


In [None]:
concat_ddf = dd.concat([cities_ddf, countries_ddf], axis=1)

In [None]:
print(concat_ddf.compute())

         cities    country
0      Medellín        NaN
1           Rio        NaN
2        Bogotá   Colombia
3  Buenos Aires        NaN
4           NaN     Brasil
6           NaN   Colombia
8           NaN  Argentina


### Resetting the index doesn't work with concat

In [None]:
concat_ddf = dd.concat([cities_ddf, countries_ddf.reset_index(drop=True)], axis=1)

ValueError: Unable to concatenate DataFrame with unknown division specifying axis=1

### assign work around

In [None]:
assign_ddf = cities_ddf.reset_index(drop=True).assign(
    label=countries_ddf.reset_index(drop=True).country
)

In [None]:
print(assign_ddf.compute())

         cities      label
0      Medellín   Colombia
1           Rio     Brasil
0        Bogotá   Colombia
1  Buenos Aires  Argentina
