### Catenating Datasets in Pandas

#### Vertical Catenation (axis=0)
```python
# Helper function to create DataFrames
def makedf(cols, ind):
    data = {c: [str(c) + str(i) for i in ind] for c in cols}
    return pd.DataFrame(data, ind)

# Example DataFrames
a = makedf("AB", [0,1])
b = makedf("AB", [2,3])
c = makedf("CD", [0,1])
d = makedf("BC", [2,3])

# Concatenating vertically
result1 = pd.concat([a, b])
```
Result:
```
    A   B
0  A0  B0
1  A1  B1
2  A2  B2
3  A3  B3
```

#### Duplicate Indices
```python
# Concatenating with duplicate indices
result2 = pd.concat([a, a])
# Handling duplicate indices
result3 = pd.concat([a, a], ignore_index=True)
result4 = pd.concat([a, a], keys=['first', 'second'])
```
Result (`result2` with duplicate indices):
```
    A   B
0  A0  B0
1  A1  B1
0  A0  B0
1  A1  B1
```
Result (`result3` with renumbered indices):
```
    A   B
0  A0  B0
1  A1  B1
2  A0  B0
3  A1  B1
```
Result (`result4` with hierarchical indexing):
```
         A   B
first  0  A0  B0
       1  A1  B1
second 0  A0  B0
       1  A1  B1
```

#### Horizontal Catenation (axis=1)
```python
# Concatenating horizontally
result5 = pd.concat([a, c], axis=1)
```
Result:
```
    A   B   C   D
0  A0  B0  C0  D0
1  A1  B1  C1  D1
```

#### Handling Different Columns and Indices
```python
# Outer join (union of columns)
result6 = pd.concat([a, d], sort=False)
# Inner join (intersection of columns)
result7 = pd.concat([a, d], join="inner")
```
Result (`result6` with outer join and NaNs):
```
     A    B    C
0   A0   B0  NaN
1   A1   B1  NaN
2  NaN   B2   C2
3  NaN   B3   C3
```
Result (`result7` with inner join):
```
    B
0  B0
1  B1
2  B2
3  B3
```

In [39]:
"""
Exercise 5.1 (split date continues)
Write function split_date_continues that does

read the bicycle data set
clean the data set of columns/rows that contain only missing values

drops the Päivämäärä column and replaces it with its splitted components as before

Use the concat function to do this.

The function should return a DataFrame with 25 columns (first five related to the date and then the rest 20 concerning the measument location.

Hint: You may use your solution or the model solution from exercise 16 of the previous set as a starting point.
"""

"""
days = dict(zip("ma ti ke to pe la su".split(), "Mon Tue Wed Thu Fri Sat Sun".split()))
months = dict(zip("tammi helmi maalis huhti touko kesä heinä elo syys loka marras joulu".split(), range(1,13)))
def split_date():
    df = pd.read_csv("src/Helsingin_pyorailijamaarat.csv", sep=";")
    df = df.dropna(axis=0, how="all").dropna(axis=1, how="all")
    d = df["Päivämäärä"].str.split(expand=True)
    d.columns = ["Weekday", "Day", "Month", "Year", "Hour"]
 
    hourmin = d["Hour"].str.split(":", expand=True)
    d["Hour"] = hourmin.iloc[:,0]
 
    d["Weekday"] = d["Weekday"].map(days)
    d["Month"] = d["Month"].map(months)
    
    d = d.astype({"Weekday": object, "Day": int, "Month": int, "Year": int, "Hour": int})
    return d"""

import pandas as pd
import numpy as np

days = dict(zip("ma ti ke to pe la su".split(), "Mon Tue Wed Thu Fri Sat Sun".split()))
months = dict(zip("tammi helmi maalis huhti touko kesä heinä elo syys loka marras joulu".split(), range(1,13)))
def split_date(df):
   
    
    d = df["Päivämäärä"].str.split(expand=True)
    d.columns = ["Weekday", "Day", "Month", "Year", "Hour"]
 
    hourmin = d["Hour"].str.split(":", expand=True)
    d["Hour"] = hourmin.iloc[:,0]
 
    d["Weekday"] = d["Weekday"].map(days)
    d["Month"] = d["Month"].map(months)

    
    d = d.astype({"Weekday": object, "Day": int, "Month": int, "Year": int, "Hour": int})
    return d


def split_date_continues():
    df = pd.read_csv("part05-e01_split_date_continues/src/Helsingin_pyorailijamaarat.csv", sep=';')

    # cleaning all the missing values
    df = df.dropna(axis = 0, how='all').dropna(axis = 1, how='all')

    # first 5 column for date format
    date_format = split_date(df)
    
    # drop Päivämäärä
    df = df.drop('Päivämäärä', axis=1) 
   
    
    # rest 20 concerning the measurment location
    df = pd.concat([date_format, df], axis=1)

    return df

    
 

def main():
    df = split_date_continues()
    print("Shape:", df.shape)
    print("Column names:\n", df.columns)
    print(df.head())
main()

Shape: (37128, 25)
Column names:
 Index(['Weekday', 'Day', 'Month', 'Year', 'Hour', 'Auroransilta',
       'Eteläesplanadi', 'Huopalahti (asema)', 'Kaisaniemi/Eläintarhanlahti',
       'Kaivokatu', 'Kulosaaren silta et.', 'Kulosaaren silta po. ',
       'Kuusisaarentie', 'Käpylä, Pohjoisbaana',
       'Lauttasaaren silta eteläpuoli', 'Merikannontie',
       'Munkkiniemen silta eteläpuoli', 'Munkkiniemi silta pohjoispuoli',
       'Heperian puisto/Ooppera', 'Pitkäsilta itäpuoli',
       'Pitkäsilta länsipuoli', 'Lauttasaaren silta pohjoispuoli',
       'Ratapihantie', 'Viikintie', 'Baana'],
      dtype='object')
  Weekday  Day  Month  Year  Hour  Auroransilta  Eteläesplanadi  \
0     Wed    1      1  2014     0           NaN             7.0   
1     Wed    1      1  2014     1           NaN             5.0   
2     Wed    1      1  2014     2           NaN             2.0   
3     Wed    1      1  2014     3           NaN             5.0   
4     Wed    1      1  2014     4           Na