# train type


In [1]:
import numpy as np
from math import radians, sin, cos, sqrt, atan2
from datetime import datetime

In [2]:
import pandas as pd

# Load the dataset
df = pd.read_csv('DBtrainrides.csv')  # Update with your actual file path




In [3]:
# Function to categorize 'line' entries
def categorize_line(line):
    if pd.isnull(line) or line.strip() == '':
        return 'No Prefix'
    # Extract the alphabetic prefix from the line
    prefix = ''.join(filter(str.isalpha, line))
    if prefix == '':
        return 'No Prefix'
    elif prefix in ['RE', 'RB']:
        return 'RE/RB Prefix'
    else:
        return 'Other Prefix'

In [4]:
# Apply the function to create a new column 'line_category'
df['line_category'] = df['line'].apply(categorize_line)

# Split the data into three DataFrames
df_re_rb = df[df['line_category'] == 'RE/RB Prefix']
df_no_prefix = df[df['line_category'] == 'No Prefix']
df_other_prefix = df[df['line_category'] == 'Other Prefix']

# Display the number of entries in each category
print("Number of entries with RE/RB prefix:", len(df_re_rb))
print("Number of entries with no prefix:", len(df_no_prefix))
print("Number of entries with other prefixes:", len(df_other_prefix))

Number of entries with RE/RB prefix: 180382
Number of entries with no prefix: 1803758
Number of entries with other prefixes: 77217


In [6]:
df

Unnamed: 0,ID,line,path,eva_nr,category,station,state,city,zip,long,...,arrival_plan,departure_plan,arrival_change,departure_change,arrival_delay_m,departure_delay_m,info,arrival_delay_check,departure_delay_check,line_category
0,1573967790757085557-2407072312-14,20,Stolberg(Rheinl)Hbf Gl.44|Eschweiler-St.Jöris|...,8000001,2,Aachen Hbf,Nordrhein-Westfalen,Aachen,52064,6.091499,...,2024-07-08 00:00:00,2024-07-08 00:01:00,2024-07-08 00:03:00,2024-07-08 00:04:00,3,3,,on_time,on_time,No Prefix
1,349781417030375472-2407080017-1,18,,8000001,2,Aachen Hbf,Nordrhein-Westfalen,Aachen,52064,6.091499,...,,2024-07-08 00:17:00,,,0,0,,on_time,on_time,No Prefix
2,7157250219775883918-2407072120-25,1,Hamm(Westf)Hbf|Kamen|Kamen-Methler|Dortmund-Ku...,8000406,4,Aachen-Rothe Erde,Nordrhein-Westfalen,Aachen,52066,6.116475,...,2024-07-08 00:03:00,2024-07-08 00:04:00,2024-07-08 00:03:00,2024-07-08 00:04:00,0,0,,on_time,on_time,No Prefix
3,349781417030375472-2407080017-2,18,Aachen Hbf,8000404,5,Aachen West,Nordrhein-Westfalen,Aachen,52072,6.070715,...,2024-07-08 00:20:00,2024-07-08 00:21:00,,,0,0,,on_time,on_time,No Prefix
4,1983158592123451570-2407080010-3,33,Herzogenrath|Kohlscheid,8000404,5,Aachen West,Nordrhein-Westfalen,Aachen,52072,6.070715,...,2024-07-08 00:20:00,2024-07-08 00:21:00,2024-07-08 00:20:00,2024-07-08 00:21:00,0,0,,on_time,on_time,No Prefix
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2061352,-3877986638624297828-2407142237-4,S9,Bottrop Hbf|Bottrop-Boy|Gladbeck West,8002795,5,Herten (Westf),Nordrhein-Westfalen,Herten,45699,7.139053,...,2024-07-14 23:17:00,2024-07-14 23:17:00,,,0,0,,on_time,on_time,Other Prefix
2061353,3370285438001482281-2407142234-7,8,Lübeck-Travemünde Strand|Lübeck-Travemünde Haf...,8003775,5,Lübeck-Moisling,Schleswig-Holstein,Lübeck,23560,10.629500,...,2024-07-14 23:10:00,2024-07-14 23:11:00,2024-07-14 23:11:00,2024-07-14 23:12:00,1,1,Information,on_time,on_time,No Prefix
2061354,-8774053210575864323-2407142305-3,80,Bad Oldesloe|Reinfeld(Holst),8003775,5,Lübeck-Moisling,Schleswig-Holstein,Lübeck,23560,10.629500,...,2024-07-14 23:17:00,2024-07-14 23:18:00,2024-07-14 23:17:00,2024-07-14 23:18:00,0,0,Information,on_time,on_time,No Prefix
2061355,-1537118689903044118-2407142354-1,11,,8001580,4,Düsseldorf Flughafen Terminal,Nordrhein-Westfalen,Düsseldorf,40474,6.766979,...,,2024-07-14 23:54:00,,,0,0,Information. (Quelle: zuginfo.nrw),on_time,on_time,No Prefix


In [5]:
# Optionally, display unique prefixes in 'line' column for further analysis
df['line_prefix'] = df['line'].str.extract(r'^([A-Za-z]+)', expand=False)
unique_prefixes = df['line_prefix'].dropna().unique()

print("\nUnique prefixes in 'line' column:")
print(unique_prefixes)


Unique prefixes in 'line' column:
['RB' 'RE' 'S' 'RS' 'EV' 'EM' 'C' 'A' 'U' 'L' 'Biker' 'Rb' 'N' 'MEX' 'X'
 'T' 'FEX' 'WX' 'EX']


In [6]:
#df_other_prefix.head(10)

In [7]:
#print(df[df['line'].str.startswith('Biker', na=False)])

In [6]:
def parse_id(id_str):
    if pd.isnull(id_str):
        return None, None, None
    parts = id_str.split('-')
    if len(parts) == 3:
        route_id = parts[0]
        departure_time_str = parts[1]
        station_number = parts[2]
    elif len(parts) == 4 and parts[0] == '':
        # This is when route_id starts with a minus sign
        route_id = '-' + parts[1]
        departure_time_str = parts[2]
        station_number = parts[3]
    else:
        # ID does not conform to expected pattern
        return None, None, None
    return route_id, departure_time_str, station_number

In [7]:
def parse_departure_time(departure_time_str):
    if not isinstance(departure_time_str, str) or len(departure_time_str) != 10:
        return None
    try:
        year = int('20' + departure_time_str[0:2])  # Assuming years are 2020+
        month = int(departure_time_str[2:4])
        day = int(departure_time_str[4:6])
        hour = int(departure_time_str[6:8])
        minute = int(departure_time_str[8:10])
        dt = datetime(year, month, day, hour, minute)
    except ValueError:
        dt = None
    return dt

In [8]:
# Parse the 'ID' column
df[['route_id', 'departure_time_str', 'station_number']] = df['ID'].apply(
    lambda x: pd.Series(parse_id(x))
)

# Convert 'departure_time_str' to datetime
df['departure_time'] = df['departure_time_str'].apply(parse_departure_time)

# Convert 'station_number' to numeric
df['station_number'] = pd.to_numeric(df['station_number'], errors='coerce')

# Ensure 'long' and 'lat' are numeric
df['long'] = pd.to_numeric(df['long'], errors='coerce')
df['lat'] = pd.to_numeric(df['lat'], errors='coerce')

# Remove entries with missing coordinates
df = df.dropna(subset=['long', 'lat'])

# Sort the DataFrame
df = df.sort_values(by=['route_id', 'departure_time', 'station_number'])


In [12]:
# Function to compute the Haversine distance between two coordinate points
def haversine_distance(lat1, lon1, lat2, lon2):
    # Convert latitude and longitude from degrees to radians
    R = 6371  # Earth radius in kilometers
    phi1 = radians(lat1)
    phi2 = radians(lat2)
    delta_phi = radians(lat2 - lat1)
    delta_lambda = radians(lon2 - lon1)
    # Compute haversine formula
    a = sin(delta_phi / 2.0) ** 2 + \
        cos(phi1) * cos(phi2) * sin(delta_lambda / 2.0) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c
    return distance  # Distance in kilometers

In [13]:
def compute_average_distance(group):
    group = group.sort_values('station_number')
    # Compute distances between consecutive stops
    latitudes = group['lat'].values
    longitudes = group['long'].values
    distances = []
    for i in range(len(group) - 1):
        lat1, lon1 = latitudes[i], longitudes[i]
        lat2, lon2 = latitudes[i + 1], longitudes[i + 1]
        distance = haversine_distance(lat1, lon1, lat2, lon2)
        distances.append(distance)
    # Calculate average distance
    if distances:
        avg_distance = np.mean(distances)
    else:
        avg_distance = 0  # Only one stop in the journey
    group['avg_distance_between_stops'] = avg_distance
    return group

In [14]:
# Apply the function to each journey
df = df.groupby(['route_id', 'departure_time']).apply(compute_average_distance)

# Now, df has a new column 'avg_distance_between_stops'
# Let's examine some examples
print(df[['route_id', 'departure_time', 'avg_distance_between_stops']].drop_duplicates().head())

# Optionally, visualize the distribution of average distances
import matplotlib.pyplot as plt

avg_distances = df[['route_id', 'departure_time', 'avg_distance_between_stops']].drop_duplicates()['avg_distance_between_stops']

plt.figure(figsize=(10, 6))
plt.hist(avg_distances, bins=100)
plt.xlabel('Average Distance Between Stops (km)')
plt.ylabel('Number of Journeys')
plt.title('Distribution of Average Distances Between Stops')
plt.show()

KeyboardInterrupt: 

In [None]:
# Analyze the distribution to decide on a threshold
# For this example, let's assume a threshold of 2 km
threshold = 3  # Adjust based on your data analysis

# Create a new column 'train_type' based on the average distance
def classify_train_type(avg_distance):
    if avg_distance <= threshold:
        return 'Tram'
    else:
        return 'Regional Train'

# Apply the classification
df['train_type'] = df['avg_distance_between_stops'].apply(classify_train_type)

# Verify the classification
print(df[['route_id', 'departure_time', 'avg_distance_between_stops', 'train_type']].drop_duplicates().head())


                                                              route_id  \
route_id             departure_time                                      
-1001326572688500578 2024-07-08 20:41:00 269718   -1001326572688500578   
                     2024-07-09 20:41:00 578410   -1001326572688500578   
                     2024-07-10 20:41:00 889314   -1001326572688500578   
                     2024-07-11 20:41:00 1199426  -1001326572688500578   
                     2024-07-12 20:41:00 1506702  -1001326572688500578   

                                                      departure_time  \
route_id             departure_time                                    
-1001326572688500578 2024-07-08 20:41:00 269718  2024-07-08 20:41:00   
                     2024-07-09 20:41:00 578410  2024-07-09 20:41:00   
                     2024-07-10 20:41:00 889314  2024-07-10 20:41:00   
                     2024-07-11 20:41:00 1199426 2024-07-11 20:41:00   
                     2024-07-12 20:41:00 1506702 

In [None]:
# If 'line_category' exists, use it
# Else, use the average distance classification

def final_classification(row):
    if row['line_category'] in ['Regional Train', 'Tram']:
        return row['line_category']
    else:
        return row['train_type']

df['final_train_type'] = df.apply(final_classification, axis=1)


In [None]:
df.head(10)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ID,line,path,eva_nr,category,station,state,city,zip,long,...,departure_delay_check,line_category,line_prefix,route_id,departure_time_str,station_number,departure_time,avg_distance_between_stops,train_type,final_train_type
route_id,departure_time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
-1001326572688500578,2024-07-08 20:41:00,269718,-1001326572688500578-2407082041-1,14,,8011113,1,Berlin Südkreuz,Berlin,Berlin,12101,13.365319,...,on_time,No Prefix,,-1001326572688500578,2407082041,1,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,269968,-1001326572688500578-2407082041-2,14,Berlin Südkreuz,8011118,2,Berlin Potsdamer Platz,Berlin,Berlin,10785,13.375988,...,on_time,No Prefix,,-1001326572688500578,2407082041,2,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,265931,-1001326572688500578-2407082041-4,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin Hbf,8011167,4,Jungfernheide,Berlin,Berlin,10589,13.299437,...,on_time,No Prefix,,-1001326572688500578,2407082041,4,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,275119,-1001326572688500578-2407082041-5,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin ...,8010404,2,Berlin-Spandau,Berlin,Berlin,13597,13.196898,...,on_time,No Prefix,,-1001326572688500578,2407082041,5,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,273997,-1001326572688500578-2407082041-6,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin ...,8080040,5,Albrechtshof,Berlin,Berlin,13591,13.128917,...,on_time,No Prefix,,-1001326572688500578,2407082041,6,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,286172,-1001326572688500578-2407082041-7,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin ...,8081586,5,Seegefeld,Brandenburg,Falkensee,14612,13.11681,...,delay,No Prefix,,-1001326572688500578,2407082041,7,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,277154,-1001326572688500578-2407082041-8,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin ...,8013479,5,Falkensee,Brandenburg,Falkensee,14612,13.089636,...,on_time,No Prefix,,-1001326572688500578,2407082041,8,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-09 20:41:00,578410,-1001326572688500578-2407092041-1,14,,8011113,1,Berlin Südkreuz,Berlin,Berlin,12101,13.365319,...,on_time,No Prefix,,-1001326572688500578,2407092041,1,2024-07-09 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-09 20:41:00,578664,-1001326572688500578-2407092041-2,14,Berlin Südkreuz,8011118,2,Berlin Potsdamer Platz,Berlin,Berlin,10785,13.375988,...,on_time,No Prefix,,-1001326572688500578,2407092041,2,2024-07-09 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-09 20:41:00,574598,-1001326572688500578-2407092041-4,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin Hbf,8011167,4,Jungfernheide,Berlin,Berlin,10589,13.299437,...,on_time,No Prefix,,-1001326572688500578,2407092041,4,2024-07-09 20:41:00,4.048603,Regional Train,Regional Train


In [None]:
unique_train_types = df['final_train_type'].unique()
print("Unique train types:", unique_train_types)

Unique train types: ['Regional Train' 'Tram']


In [None]:
# Assuming 'df' is your DataFrame that includes the 'final_train_type' column

# Create a DataFrame for Regional Trains
df_regional_train = df[df['final_train_type'] == 'Regional Train'].copy()

# Create a DataFrame for Trams
df_tram = df[df['final_train_type'] == 'Tram'].copy()

In [None]:
# Display the number of entries in each DataFrame
print("Number of entries classified as Regional Train:", len(df_regional_train))
print("Number of entries classified as Tram:", len(df_tram))



Number of entries classified as Regional Train: 971717
Number of entries classified as Tram: 1089640


In [None]:
# Examine the first few rows of each DataFrame
print("\nFirst few rows of Regional Train DataFrame:")
df_regional_train.head(50)




First few rows of Regional Train DataFrame:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ID,line,path,eva_nr,category,station,state,city,zip,long,...,departure_delay_check,line_category,line_prefix,route_id,departure_time_str,station_number,departure_time,avg_distance_between_stops,train_type,final_train_type
route_id,departure_time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
-1001326572688500578,2024-07-08 20:41:00,269718,-1001326572688500578-2407082041-1,14,,8011113,1,Berlin Südkreuz,Berlin,Berlin,12101,13.365319,...,on_time,No Prefix,,-1001326572688500578,2407082041,1,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,269968,-1001326572688500578-2407082041-2,14,Berlin Südkreuz,8011118,2,Berlin Potsdamer Platz,Berlin,Berlin,10785,13.375988,...,on_time,No Prefix,,-1001326572688500578,2407082041,2,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,265931,-1001326572688500578-2407082041-4,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin Hbf,8011167,4,Jungfernheide,Berlin,Berlin,10589,13.299437,...,on_time,No Prefix,,-1001326572688500578,2407082041,4,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,275119,-1001326572688500578-2407082041-5,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin ...,8010404,2,Berlin-Spandau,Berlin,Berlin,13597,13.196898,...,on_time,No Prefix,,-1001326572688500578,2407082041,5,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,273997,-1001326572688500578-2407082041-6,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin ...,8080040,5,Albrechtshof,Berlin,Berlin,13591,13.128917,...,on_time,No Prefix,,-1001326572688500578,2407082041,6,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,286172,-1001326572688500578-2407082041-7,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin ...,8081586,5,Seegefeld,Brandenburg,Falkensee,14612,13.11681,...,delay,No Prefix,,-1001326572688500578,2407082041,7,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-08 20:41:00,277154,-1001326572688500578-2407082041-8,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin ...,8013479,5,Falkensee,Brandenburg,Falkensee,14612,13.089636,...,on_time,No Prefix,,-1001326572688500578,2407082041,8,2024-07-08 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-09 20:41:00,578410,-1001326572688500578-2407092041-1,14,,8011113,1,Berlin Südkreuz,Berlin,Berlin,12101,13.365319,...,on_time,No Prefix,,-1001326572688500578,2407092041,1,2024-07-09 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-09 20:41:00,578664,-1001326572688500578-2407092041-2,14,Berlin Südkreuz,8011118,2,Berlin Potsdamer Platz,Berlin,Berlin,10785,13.375988,...,on_time,No Prefix,,-1001326572688500578,2407092041,2,2024-07-09 20:41:00,4.048603,Regional Train,Regional Train
-1001326572688500578,2024-07-09 20:41:00,574598,-1001326572688500578-2407092041-4,14,Berlin Südkreuz|Berlin Potsdamer Platz|Berlin Hbf,8011167,4,Jungfernheide,Berlin,Berlin,10589,13.299437,...,on_time,No Prefix,,-1001326572688500578,2407092041,4,2024-07-09 20:41:00,4.048603,Regional Train,Regional Train


In [None]:
print("\nFirst few rows of Tram DataFrame:")
df_tram.head(50)


First few rows of Tram DataFrame:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ID,line,path,eva_nr,category,station,state,city,zip,long,...,departure_delay_check,line_category,line_prefix,route_id,departure_time_str,station_number,departure_time,avg_distance_between_stops,train_type,final_train_type
route_id,departure_time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
-1002143947516173508,2024-07-08 17:36:00,209585,-1002143947516173508-2407081736-1,2,,8000669,4,Aumühle,Schleswig-Holstein,Aumühle,21521,10.314672,...,on_time,No Prefix,,-1002143947516173508,2407081736,1,2024-07-08 17:36:00,2.551192,Tram,Tram
-1002143947516173508,2024-07-08 17:36:00,225590,-1002143947516173508-2407081736-2,2,Aumühle,8006541,5,Wohltorf,Schleswig-Holstein,Wohltorf,21521,10.278184,...,on_time,No Prefix,,-1002143947516173508,2407081736,2,2024-07-08 17:36:00,2.551192,Tram,Tram
-1002143947516173508,2024-07-08 17:36:00,222391,-1002143947516173508-2407081736-3,2,Aumühle|Wohltorf,8005016,5,Reinbek,Schleswig-Holstein,Reinbek,21465,10.253335,...,on_time,No Prefix,,-1002143947516173508,2407081736,3,2024-07-08 17:36:00,2.551192,Tram,Tram
-1002143947516173508,2024-07-08 17:36:00,215656,-1002143947516173508-2407081736-4,2,Aumühle|Wohltorf|Reinbek,8002554,3,Hamburg-Bergedorf,Hamburg,Hamburg,21029,10.206171,...,on_time,No Prefix,,-1002143947516173508,2407081736,4,2024-07-08 17:36:00,2.551192,Tram,Tram
-1002143947516173508,2024-07-08 17:36:00,220546,-1002143947516173508-2407081736-5,2,Aumühle|Wohltorf|Reinbek|Hamburg-Bergedorf,8004241,4,Nettelnburg,Hamburg,Hamburg,21035,10.181408,...,on_time,No Prefix,,-1002143947516173508,2407081736,5,2024-07-08 17:36:00,2.551192,Tram,Tram
-1002143947516173508,2024-07-08 17:36:00,226475,-1002143947516173508-2407081736-6,2,Aumühle|Wohltorf|Reinbek|Hamburg-Bergedorf|Ham...,8000779,4,Allermöhe,Hamburg,Hamburg,21035,10.158735,...,on_time,No Prefix,,-1002143947516173508,2407081736,6,2024-07-08 17:36:00,2.551192,Tram,Tram
-1002143947516173508,2024-07-08 17:36:00,219273,-1002143947516173508-2407081736-7,2,Aumühle|Wohltorf|Reinbek|Hamburg-Bergedorf|Ham...,8004048,4,Mittlerer Landweg,Hamburg,Hamburg,21033,10.131122,...,on_time,No Prefix,,-1002143947516173508,2407081736,7,2024-07-08 17:36:00,2.551192,Tram,Tram
-1002143947516173508,2024-07-08 17:36:00,211165,-1002143947516173508-2407081736-8,2,Aumühle|Wohltorf|Reinbek|Hamburg-Bergedorf|Ham...,8000971,5,Billwerder-Moorfleet,Hamburg,Hamburg,22113,10.096658,...,on_time,No Prefix,,-1002143947516173508,2407081736,8,2024-07-08 17:36:00,2.551192,Tram,Tram
-1002143947516173508,2024-07-08 17:36:00,224299,-1002143947516173508-2407081736-9,2,Aumühle|Wohltorf|Reinbek|Hamburg-Bergedorf|Ham...,8005870,5,Tiefstack,Hamburg,Hamburg,20539,10.065591,...,on_time,No Prefix,,-1002143947516173508,2407081736,9,2024-07-08 17:36:00,2.551192,Tram,Tram
-1002143947516173508,2024-07-08 17:36:00,233141,-1002143947516173508-2407081736-13,2,Aumühle|Wohltorf|Reinbek|Hamburg-Bergedorf|Ham...,8002548,2,Hamburg Dammtor,Hamburg,Hamburg,20354,9.989566,...,on_time,No Prefix,,-1002143947516173508,2407081736,13,2024-07-08 17:36:00,2.551192,Tram,Tram


In [None]:
df_regional_train.to_csv('regional_trains.csv', index=False)
df_tram.to_csv('trams.csv', index=False)
