In [1]:
import pandas as pd
import random
from faker import Faker

fake = Faker('en_IN')  # Use Indian locale for Indian names
Faker.seed(42)
random.seed(42)

# Indian Railway Stations
indian_stations = [
    ("New Delhi", "Delhi"), ("Mumbai Central", "Maharashtra"), ("Chennai Central", "Tamil Nadu"),
    ("Howrah Junction", "West Bengal"), ("Bangalore City", "Karnataka"), ("Secunderabad", "Telangana"),
    ("Ahmedabad", "Gujarat"), ("Pune Junction", "Maharashtra"), ("Kolkata Shalimar", "West Bengal"),
    ("Lucknow Charbagh", "Uttar Pradesh"), ("Patna Junction", "Bihar"), ("Jaipur Junction", "Rajasthan"),
    ("Bhopal Junction", "Madhya Pradesh"), ("Thiruvananthapuram", "Kerala"), ("Guwahati", "Assam"),
    ("Visakhapatnam", "Andhra Pradesh"), ("Nagpur", "Maharashtra"), ("Coimbatore", "Tamil Nadu"),
    ("Varanasi Junction", "Uttar Pradesh"), ("Madurai Junction", "Tamil Nadu")
]

# Indian Train Names
indian_trains = [
    "Rajdhani Express", "Shatabdi Express", "Duronto Express", "Gatiman Express",
    "Tejas Express", "Garib Rath Express", "Vande Bharat Express", "Maharaja Express",
    "Deccan Queen", "Humsafar Express", "Sampark Kranti Express", "Jan Shatabdi Express"
]

# Generating Data
num_trains = 50
num_stations = len(indian_stations)
num_passengers = 5000
num_bookings = 10000
num_tickets = 20000

# 1️⃣ Generate Trains Data
train_data = []
for i in range(1, num_trains + 1):
    train_data.append((i, random.choice(indian_trains), random.choice(["Express", "Superfast", "Local"]), 
                       random.randint(10, 20), random.randint(300, 1000)))

# 2️⃣ Generate Stations Data
station_data = []
for i, (station, state) in enumerate(indian_stations, start=1):
    station_data.append((i, station, state))

# 3️⃣ Generate Passengers Data with Indian Names
passenger_data = []
for i in range(1, num_passengers + 1):
    phone_number = f"{random.randint(7000000000, 9999999999)}"  # Ensure 10-digit phone number
    passenger_data.append((i, fake.name(), random.randint(10, 80), random.choice(["Male", "Female", "Other"]), 
                           phone_number))

# 4️⃣ Generate Bookings Data
booking_data = []
for i in range(1, num_bookings + 1):
    booking_data.append((i, random.randint(1, num_passengers), random.randint(1, num_trains), 
                         fake.date_this_year(), random.randint(1, num_stations), random.randint(1, num_stations),
                         random.choice(["Confirmed", "Waitlisted", "Cancelled"])))

# 5️⃣ Generate Tickets Data
ticket_data = []
for i in range(1, num_tickets + 1):
    ticket_data.append((i, random.randint(1, num_bookings), f"S{random.randint(1, 50)}-{random.randint(1, 6)}", 
                        f"C{random.randint(1, 10)}", random.choice(["Sleeper", "AC", "General"]), 
                        round(random.uniform(50, 500), 2)))

# Convert to DataFrames
df_trains = pd.DataFrame(train_data, columns=["train_id", "train_name", "train_type", "total_coaches", "seat_capacity"])
df_stations = pd.DataFrame(station_data, columns=["station_id", "station_name", "state"])
df_passengers = pd.DataFrame(passenger_data, columns=["passenger_id", "full_name", "age", "gender", "phone_number"])
df_bookings = pd.DataFrame(booking_data, columns=["booking_id", "passenger_id", "train_id", "journey_date", 
                                                  "source_station_id", "destination_station_id", "booking_status"])
df_tickets = pd.DataFrame(ticket_data, columns=["ticket_id", "booking_id", "seat_number", "coach_number", "class_type", "fare_amount"])

# Save as CSV
df_trains.to_csv("trains.csv", index=False)
df_stations.to_csv("stations.csv", index=False)
df_passengers.to_csv("passengers.csv", index=False)
df_bookings.to_csv("bookings.csv", index=False)
df_tickets.to_csv("tickets.csv", index=False)

print("✅ Dataset successfully created with Indian railway data!")

✅ Dataset successfully created with Indian railway data!


In [2]:
from IPython.display import display, Markdown

contact_details = """
## 📞 Contact Details  
- 🔗 **LinkedIn:** [Vinay Kumar Panika](https://www.linkedin.com/in/vinaykumarpanika)  
- 💻 **GitHub:** [Vinaypanika](https://github.com/Vinaypanika)  
- 📧 **Email:** vinaypanika@gmail.com  
- 📱 **Phone:** +91-7415552944  
"""

display(Markdown(contact_details))


## 📞 Contact Details  
- 🔗 **LinkedIn:** [Vinay Kumar Panika](https://www.linkedin.com/in/vinaykumarpanika)  
- 💻 **GitHub:** [Vinaypanika](https://github.com/Vinaypanika)  
- 📧 **Email:** vinaypanika@gmail.com  
- 📱 **Phone:** +91-7415552944  
