# Formatting given tranportation data

In [1]:
import pandas as pd

## Importing Data

In [None]:
df1 = pd.read_csv('FplAbweichung_Feb19_1_gesamt.csv',  
                  sep=';',  
                  encoding="ISO-8859-1")

In [None]:
df2 = pd.read_csv('FplAbweichung_Jan19_gesamt.csv',  
                  sep=';',  
                  encoding="ISO-8859-1")

In [None]:
print(df1.columns)

Index(['Trip Code', 'Trip', 'Duty', 'Line', 'Pattern', 'Direction', 'Vehicle',
       'Block', 'Date', 'Sched. dep. trip', 'Duration', 'First stop',
       'Stop no (first)', 'Last stop', 'Stop no (last)', 'Stop', 'Stop no',
       'Sched. arr', 'Actual arr', 'Sched. dep', 'Actual dep', 'Dwell time',
       'On time', 'Dep.first', 'Arr.other', 'Dep.other', 'Arr.last',
       'Outliers'],
      dtype='object')


In [None]:
print(df2.columns)

Index(['Fahrt', 'Linie', 'Route', 'Richtung', 'WUM', 'Datum', 'Soll Ab Fahrt',
       'Dauer', 'Erste Hst', 'Hst-Nr (erste)', 'Letzte Hst', 'Hst-Nr (letzte)',
       'Hst', 'Hst-Nr', 'Soll An', 'Ist An', 'Soll Ab', 'Ist Ab', 'Stand Hst',
       'Pünktlich', 'Ab.erste', 'An.sonst', 'Ab.sonst', 'An.letzte',
       'Ausreißer'],
      dtype='object')


### January Data Formatting 

The column name of the January data was in German. But in other data sets columns name are in English. So keep the consistency we need to change the column names to English.

In [None]:
df2new = df2[['Linie','Route','Richtung','Datum','Hst', 'Hst-Nr','Soll An','Ist An', 'Soll Ab', 'Ist Ab', 'Stand Hst']]

In [None]:
df2new.head()

Unnamed: 0,Linie,Route,Richtung,Datum,Hst,Hst-Nr,Soll An,Ist An,Soll Ab,Ist Ab,Stand Hst
0,21,21/205 Grete-Henry-Straße -> Auf der Lieth,2,01.01.2019,Grete-Henry-Straße,GHS11,09:48:00,09:48:37,09:48:00,09:48:37,00:00:00
1,21,21/205 Grete-Henry-Straße -> Auf der Lieth,2,01.01.2019,Vor dem Walde,VDW11,09:49:00,09:49:40,09:49:00,09:49:40,00:00:00
2,21,21/205 Grete-Henry-Straße -> Auf der Lieth,2,01.01.2019,Schöneberger Straße,SBS13,09:50:00,09:50:51,09:50:00,09:50:51,00:00:00
3,21,21/205 Grete-Henry-Straße -> Auf der Lieth,2,01.01.2019,Gehrenring,GRR11,09:51:00,09:51:33,09:51:00,09:51:33,00:00:00
4,21,21/205 Grete-Henry-Straße -> Auf der Lieth,2,01.01.2019,Adolf-Sievert-Straße,ASS11,09:52:00,09:52:43,09:52:00,09:52:43,00:00:00


#### Changing Column Names

In [None]:
newCol = {
    'Linie':'Line',
    'Route':'Pattern',
    'Richtung':'Direction',
    'Datum':'Date',
    'Hst':'Stop',
    'Hst-Nr':'Stop no',
    'Soll An':'Sched. arr',
    'Ist An':'Actual arr',
    'Soll Ab':'Sched. dep',
    'Ist Ab':'Actual dep',
    'Stand Hst':'Dwell time' 
}

In [None]:
df2new.rename(columns=newCol, inplace=True)

In [5]:
df2new.head()

Unnamed: 0,Line,Pattern,Direction,Date,Stop,Stop no,Sched. arr,Actual arr,Sched. dep,Actual dep,Dwell time
0,21,21/205 Grete-Henry-Straße -> Auf der Lieth,2,01.01.2019,Grete-Henry-Straße,GHS11,09:48:00,09:48:37,09:48:00,09:48:37,00:00:00
1,21,21/205 Grete-Henry-Straße -> Auf der Lieth,2,01.01.2019,Vor dem Walde,VDW11,09:49:00,09:49:40,09:49:00,09:49:40,00:00:00
2,21,21/205 Grete-Henry-Straße -> Auf der Lieth,2,01.01.2019,Schöneberger Straße,SBS13,09:50:00,09:50:51,09:50:00,09:50:51,00:00:00
3,21,21/205 Grete-Henry-Straße -> Auf der Lieth,2,01.01.2019,Gehrenring,GRR11,09:51:00,09:51:33,09:51:00,09:51:33,00:00:00
4,21,21/205 Grete-Henry-Straße -> Auf der Lieth,2,01.01.2019,Adolf-Sievert-Straße,ASS11,09:52:00,09:52:43,09:52:00,09:52:43,00:00:00


In [None]:
df2new.to_csv('january_data.csv', index=False)