In [2]:
import pandas as pd

# Load CSVs
df_trains = pd.read_csv('Indian Railways/trains.csv')
df_stations = pd.read_csv('Indian Railways/stations.csv')
df_bookings = pd.read_csv('Indian Railways/bookings.csv')
df_tickets = pd.read_csv('Indian Railways/tickets.csv')
df_passengers = pd.read_csv('Indian Railways/passengers.csv')

In [3]:
# 1. Select * from trains
print("All trains:")
print(df_trains)



All trains:
    train_id              train_name train_type  total_coaches  seat_capacity
0          1  Sampark Kranti Express    Express             10            581
1          2         Gatiman Express    Express             12            404
2          3  Sampark Kranti Express      Local             18            389
3          4        Humsafar Express  Superfast             10            330
4          5        Shatabdi Express    Express             13            817
5          6        Humsafar Express    Express             18            503
6          7    Jan Shatabdi Express      Local             18            729
7          8         Gatiman Express  Superfast             19            584
8          9        Rajdhani Express    Express             16            648
9         10           Tejas Express    Express             13            644
10        11        Shatabdi Express    Express             16            399
11        12      Garib Rath Express  Superfast     

In [4]:
# 2. Total number of stations
print("\nTotal number of stations:")
print(len(df_stations))


Total number of stations:
20


In [5]:
# 3. Trains running on route New Delhi to Mumbai Central
# Get station ids for source and destination
source_id = df_stations.loc[df_stations['station_name'] == 'New Delhi', 'station_id'].values[0]
dest_id = df_stations.loc[df_stations['station_name'] == 'Mumbai Central', 'station_id'].values[0]

# Filter bookings on route
route_bookings = df_bookings[
    (df_bookings['source_station_id'] == source_id) &
    (df_bookings['destination_station_id'] == dest_id)
]

# Join with trains to get train info, use drop_duplicates for distinct
route_trains = df_trains[df_trains['train_id'].isin(route_bookings['train_id'])]
print("\nTrains running from New Delhi to Mumbai Central:")
print(route_trains[['train_name', 'train_type', 'train_id']].drop_duplicates())




Trains running from New Delhi to Mumbai Central:
                train_name train_type  train_id
0   Sampark Kranti Express    Express         1
1          Gatiman Express    Express         2
2   Sampark Kranti Express      Local         3
3         Humsafar Express  Superfast         4
4         Shatabdi Express    Express         5
5         Humsafar Express    Express         6
6     Jan Shatabdi Express      Local         7
8         Rajdhani Express    Express         9
9            Tejas Express    Express        10
12        Rajdhani Express      Local        13
15        Humsafar Express    Express        16
16  Sampark Kranti Express    Express        17
18        Maharaja Express      Local        19
19      Garib Rath Express  Superfast        20
21        Shatabdi Express      Local        22
23        Maharaja Express  Superfast        24
28  Sampark Kranti Express  Superfast        29
40        Shatabdi Express  Superfast        41
43           Tejas Express      Local 

In [6]:
# 4. Bookings with ticket price > 450
bookings_tickets = pd.merge(df_bookings, df_tickets, on='booking_id')
high_price_bookings = bookings_tickets[bookings_tickets['fare_amount'] > 450]
high_price_bookings_sorted = high_price_bookings.sort_values('fare_amount', ascending=False)
print("\nBookings with ticket price > 450:")
print(high_price_bookings_sorted[['booking_id', 'passenger_id', 'train_id', 'journey_date', 'fare_amount']])


Bookings with ticket price > 450:
       booking_id  passenger_id  train_id journey_date  fare_amount
17412        8726           384        35   2025-06-20       499.95
2668         1351          3088        15   2025-08-28       499.92
4975         2532          3821        26   2025-07-26       499.90
11448        5762          4127        45   2025-01-12       499.89
1907          981          2050         7   2025-08-12       499.86
...           ...           ...       ...          ...          ...
19046        9539          3084        13   2025-07-22       450.16
2634         1338          3885        33   2025-07-24       450.11
10455        5244          2322        29   2025-06-15       450.10
1107          547          3984         8   2025-01-13       450.02
14506        7318           684         5   2025-04-09       450.01

[2238 rows x 5 columns]


In [7]:
# 5. Total tickets booked for train_id = 23
count_tickets_23 = len(df_bookings[df_bookings['train_id'] == 23])
print("\nTotal tickets booked for train_id=23:", count_tickets_23)




Total tickets booked for train_id=23: 180


In [8]:
# 6. Distinct train types
print("\nDistinct train types:")
print(df_trains['train_type'].unique())




Distinct train types:
['Express' 'Local' 'Superfast']


In [9]:
# 7. Average ticket price for all bookings
avg_ticket_price = df_tickets['fare_amount'].mean()
print("\nAverage ticket price:", avg_ticket_price)




Average ticket price: 274.28716399999996


In [10]:
# 8. Trains departing from Mumbai Central
source_id_mumbai = df_stations.loc[df_stations['station_name'] == 'Mumbai Central', 'station_id'].values[0]
bookings_from_mumbai = df_bookings[df_bookings['source_station_id'] == source_id_mumbai]
trains_from_mumbai = df_trains[df_trains['train_id'].isin(bookings_from_mumbai['train_id'])]
# Add station name column
trains_from_mumbai = trains_from_mumbai.copy()
trains_from_mumbai['station_name'] = 'Mumbai Central'
print("\nTrains departing from Mumbai Central:")
print(trains_from_mumbai[['train_id', 'train_name', 'train_type', 'station_name']].drop_duplicates())




Trains departing from Mumbai Central:
    train_id              train_name train_type    station_name
0          1  Sampark Kranti Express    Express  Mumbai Central
1          2         Gatiman Express    Express  Mumbai Central
2          3  Sampark Kranti Express      Local  Mumbai Central
3          4        Humsafar Express  Superfast  Mumbai Central
4          5        Shatabdi Express    Express  Mumbai Central
5          6        Humsafar Express    Express  Mumbai Central
6          7    Jan Shatabdi Express      Local  Mumbai Central
7          8         Gatiman Express  Superfast  Mumbai Central
8          9        Rajdhani Express    Express  Mumbai Central
9         10           Tejas Express    Express  Mumbai Central
10        11        Shatabdi Express    Express  Mumbai Central
11        12      Garib Rath Express  Superfast  Mumbai Central
12        13        Rajdhani Express      Local  Mumbai Central
13        14        Shatabdi Express  Superfast  Mumbai Central
1

In [11]:
# 9. Total passengers who traveled in last month
# Assuming 'journey_date' is datetime string compatible
df_bookings['journey_date'] = pd.to_datetime(df_bookings['journey_date'])
last_month = pd.Timestamp.today() - pd.DateOffset(months=1)
recent_passengers = df_bookings[(df_bookings['journey_date'] < pd.Timestamp.today()) & (df_bookings['journey_date'] >= last_month)]
print("\nTotal passengers traveled in last month:", len(recent_passengers))



Total passengers traveled in last month: 1163


In [12]:

# 10. Average ticket price per train type
merged = pd.merge(df_bookings, df_tickets, on='booking_id')
merged = pd.merge(merged, df_trains[['train_id', 'train_type']], on='train_id')
avg_price_by_type = merged.groupby('train_type')['fare_amount'].mean().sort_values(ascending=False)
print("\nAverage ticket price per train type:")
print(avg_price_by_type)


Average ticket price per train type:
train_type
Local        276.744580
Superfast    273.893324
Express      271.385180
Name: fare_amount, dtype: float64
