In [27]:
#importing necessary python libraries
import pandas as pd
import folium
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = "plotly_white"

metro_data = pd.read_csv("Delhi-Metro-Network.csv")

print(metro_data.head())

   Station ID         Station Name  Distance from Start (km)          Line  \
0           1             Jhil Mil                      10.3      Red line   
1           2  Welcome [Conn: Red]                      46.8     Pink line   
2           3          DLF Phase 3                      10.0   Rapid Metro   
3           4           Okhla NSIC                      23.8  Magenta line   
4           5           Dwarka Mor                      10.2     Blue line   

  Opening Date Station Layout   Latitude  Longitude  
0     4/6/2008       Elevated  28.675790  77.312390  
1   10/31/2018       Elevated  28.671800  77.277560  
2   11/14/2013       Elevated  28.493600  77.093500  
3   12/25/2017       Elevated  28.554483  77.264849  
4   12/30/2005       Elevated  28.619320  77.033260  


In [28]:
#checking for missing values
missing_values = metro_data.isnull().sum()

data_types = metro_data.dtypes

missing_values

Station ID                  0
Station Name                0
Distance from Start (km)    0
Line                        0
Opening Date                0
Station Layout              0
Latitude                    0
Longitude                   0
dtype: int64

In [29]:
#Convert opening date to datetime format
metro_data['Opening Date'] = pd.to_datetime(metro_data['Opening Date'])

In [30]:
#Geospatial Analysis
#defining a color scheme for the metro lines

line_colors = {
    'Red line': 'red',
    'Blue line': 'blue',
    'Yellow line': 'beige',
    'Green line': 'green',
    'Violet line': 'purple',
    'Pink line': 'pink',
    'Magenta line': 'darkred',
    'Orange line': 'orange',
    'Rapid Metro': 'cadetblue',
    'Aqua line': 'black',
    'Green line branch': 'lightgreen',
    'Blue line branch': 'lightblue',
    'Gray line': 'lightgray'
    }

delhi_map_with_line_tooltip = folium.Map(location =[28.7041, 77.1025], zoom_start = 10)

#adding colored markers for each metro station with line name in tooltip
for index, row in metro_data.iterrows():
  line = row['Line']
  color = line_colors.get(line, 'black') #default color is black if line not found in dictionary
  folium.Marker(
      location = [row['Latitude'], row['Longitude']],
      popup = f"{row['Station Name']}",
      tooltip = f"{row['Station Name']}, {line}",
      icon = folium.Icon(color=color)
  ).add_to(delhi_map_with_line_tooltip)

#displaying the updated map
delhi_map_with_line_tooltip

In [31]:
#temporal analysis
metro_data['Opening Date'] = metro_data['Opening Date'].dt.year

#counting the number of stations opened each year
stations_per_year = metro_data['Opening Date'].value_counts().sort_index()

stations_per_year_df = stations_per_year.reset_index()
stations_per_year_df.columns = ['Year', 'Number of Stations']

fig = px.bar(stations_per_year_df, x = 'Year', y = 'Number of Stations',
             title = 'Number of metro stations opened each year in Delhi',
             labels = {'Year':'year', 'Number of Stations': 'Number of Stations'})
fig.update_layout(xaxis_tickangle = 45, xaxis = dict(tickmode = 'linear'),
                  yaxis = dict(title = 'Number of Stations Opened'),
                  xaxis_title = 'Year')

fig.show()


In [32]:
#Line analysis

stations_per_line = metro_data['Line'].value_counts()

#calculating the total distance of each metro line (max distance from start)
total_distance_per_line = metro_data.groupby('Line')['Distance from Start (km)'].max()

avg_distance_per_line = total_distance_per_line / (stations_per_line - 1)

line_analysis = pd.DataFrame({
    'Line' : stations_per_line.index,
    'Number of stations' : stations_per_line.values,
    'Average Distance between stations (km)' : avg_distance_per_line
})

#sorting the Dataframe by the number of stations
line_analysis = line_analysis.sort_values(by='Number of stations', ascending=False)

line_analysis.reset_index(drop = True, inplace = True)
print(line_analysis)

                 Line  Number of stations  \
0           Blue line                  49   
1           Pink line                  38   
2         Yellow line                  37   
3         Voilet line                  34   
4            Red line                  29   
5        Magenta line                  25   
6           Aqua line                  21   
7          Green line                  21   
8         Rapid Metro                  11   
9    Blue line branch                   8   
10        Orange line                   6   
11          Gray line                   3   
12  Green line branch                   3   

    Average Distance between stations (km)  
0                                 1.355000  
1                                 1.097917  
2                                 1.157143  
3                                 1.950000  
4                                 1.240000  
5                                 1.050000  
6                                 1.379167  
7        

In [38]:
#creating subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=('Number of Stations per Metro Line',
                                                    'Average distance between Stations per Metro line'),
                    horizontal_spacing = 0.2)

#plot for Number of stations per line
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Number of stations'],
           orientation='h', name='Number of stations', marker_color= 'crimson'),
    row=1, col=1
)

#plot for Average Distance between stations
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Average Distance between stations (km)'],
           orientation='h', name='Average distance (km)', marker_color= 'navy'),
    row=1, col=2
)

#update xaxis properties
fig.update_xaxes(title_text = 'Number of stations', row=1, col=1)
fig.update_xaxes(title_text = 'Average Distance between stations (km)', row=1, col=2)

#update layout
fig.update_layout(height = 600, width = 900, title_text='Metro Line Analysis', template = 'plotly_white')

fig.show()

In [49]:
layout_counts = metro_data['Station Layout'].value_counts()

#creating the bar plot using plotly
fig = px.bar(x= layout_counts.index, y= layout_counts.values,
             labels = {'x': 'Station Layout', 'y': 'Number of stations'},
             title = 'Distribution of Delhi Metro Station Layout',
             color = layout_counts.index,
             color_continuous_scale='pastel')

#updating layout for better presentation
fig.update_layout(xaxis_title='Station Layout',
                  yaxis_title = 'Number of stations',
                  coloraxis_showscale = False,
                  template = 'plotly_white')

#update layout
fig.update_layout(height = 500, width = 900)

fig.show()