In [26]:
import pandas as pd
import folium
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = "plotly_white"


In [29]:
missing_values=data.isnull().sum()
missing_values

Station ID                  0
Station Name                0
Distance from Start (km)    0
Line                        0
Opening Date                0
Station Layout              0
Latitude                    0
Longitude                   0
dtype: int64

In [27]:
data= pd.read_csv("Delhi-Metro-Network.csv")

In [28]:
data.head()

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.67579,77.31239
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.6718,77.27756
2,3,DLF Phase 3,10.0,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935
3,4,Okhla NSIC,23.8,Magenta line,2017-12-25,Elevated,28.554483,77.264849
4,5,Dwarka Mor,10.2,Blue line,2005-12-30,Elevated,28.61932,77.03326


In [30]:
data_types=data.dtypes
data_types

Station ID                    int64
Station Name                 object
Distance from Start (km)    float64
Line                         object
Opening Date                 object
Station Layout               object
Latitude                    float64
Longitude                   float64
dtype: object

In [31]:
data['Opening Date'] = pd.to_datetime(data['Opening Date'])
data['Opening Date']

0     2008-04-06
1     2018-10-31
2     2013-11-14
3     2017-12-25
4     2005-12-30
         ...    
280   2015-06-09
281   2006-01-04
282   2009-12-11
283   2019-01-25
284   2009-12-11
Name: Opening Date, Length: 285, dtype: datetime64[ns]

# Geospatial Analysis

Visualizing the locations of the metro stations on a mao which gives us an insight into the geographical distribution of the station across the city by plotting each station with a marker using latitudes and longtitudes.
This helps to analyze the station densitu and geographic spread.

In [32]:
line_colors = {
    'Red line': 'red',
    'Blue line': 'blue',
    'Yellow line': 'beige',
    'Green line': 'green',
    'Voilet line': 'purple',
    'Pink line': 'pink',
    'Magenta line': 'darkred',
    'Orange line': 'orange',
    'Rapid Metro': 'cadetblue',
    'Aqua line': 'black',
    'Green line branch': 'lightgreen',
    'Blue line branch': 'lightblue',
    'Gray line': 'lightgray'
}

map_station_plots = folium.Map(location=[28.7041, 77.1025], zoom_start=11)

# adding colored markers for each metro station with line name in tooltip
for index, row in data.iterrows():
    line = row['Line']
    color = line_colors.get(line, 'black')  # Default color is black if line not found in the dictionary
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['Station Name']}",
        tooltip=f"{row['Station Name']}, {line}",
        icon=folium.Icon(color=color)
    ).add_to(map_station_plots)

# Displaying the updated map
map_station_plots

Each markere represents a metro station. Hover over a marker shows the station name and the metro line. This visualization provides us a basic insight about the spread of the metro stations and the metro line across the city.

# Temporal Analysis

Analyzing the growth of the Delhi Metro network over time by looking at the number of stations opened each year, which provides insights into the pace of metro network expansion and its devlopment phases.

In [33]:
data['Opening Year'] = data['Opening Date'].dt.year

# counting the number of stations opened each year
stations_per_year = data['Opening Year'].value_counts().sort_index()
stations_per_year

Opening Year
2002     6
2003     4
2004    11
2005    28
2006     9
2008     3
2009    17
2010    54
2011    13
2013     5
2014     3
2015    13
2017    18
2018    64
2019    37
Name: count, dtype: int64

In [34]:
stations_per_year_df = stations_per_year.reset_index()
stations_per_year_df.columns = ['Year', 'Number of Stations']

stations_per_year_df

Unnamed: 0,Year,Number of Stations
0,2002,6
1,2003,4
2,2004,11
3,2005,28
4,2006,9
5,2008,3
6,2009,17
7,2010,54
8,2011,13
9,2013,5


In [35]:
fig = px.bar(stations_per_year_df, x='Year', y='Number of Stations',
             title="Number of Metro Stations Opened Each Year in Delhi",
             labels={'Year': 'Year', 'Number of Stations': 'Number of Stations Opened'})

fig.show()

In [36]:
fig.update_layout(xaxis_tickangle=-45, xaxis=dict(tickmode='linear'),
                  yaxis=dict(title='Number of Stations Opened'),
                  xaxis_title="Year")

fig.show()

The Bar chart gives a visualization about the number of metro stations opened each year. This helps us understand the development of the metro network over the years. 
Key Obersvations:
1. The years 2010,2019 and 2019 shows a significant rise in the number of metro station which indicates a rapid network expansion.
2. Few years like: 2003, 2007 2012 and 2014 show very few or no devolopment in the network, which could be due to variety of factors like planning, funding, etc.

# Line Analysis

Analyzing metro lines in terms of the number of metro stations per line and the distance between them, which gives us insights into the characteristics of each metro line,such as which lines are more extensive or denser.

In [37]:
stations_per_line = data['Line'].value_counts()

# calculating the total distance of each metro line (max distance from start)
total_distance_per_line = data.groupby('Line')['Distance from Start (km)'].max()

avg_distance_per_line = total_distance_per_line / (stations_per_line - 1)

line_analysis = pd.DataFrame({
    'Line': stations_per_line.index,
    'Number of Stations': stations_per_line.values,
    'Average Distance Between Stations (km)': avg_distance_per_line
})

# sorting the DataFrame by the number of stations
line_analysis = line_analysis.sort_values(by='Number of Stations', ascending=False)

line_analysis.reset_index(drop=True, inplace=True)
print(line_analysis)

                 Line  Number of Stations  \
0           Blue line                  49   
1           Pink line                  38   
2         Yellow line                  37   
3         Voilet line                  34   
4            Red line                  29   
5        Magenta line                  25   
6           Aqua line                  21   
7          Green line                  21   
8         Rapid Metro                  11   
9    Blue line branch                   8   
10        Orange line                   6   
11          Gray line                   3   
12  Green line branch                   3   

    Average Distance Between Stations (km)  
0                                 1.355000  
1                                 1.097917  
2                                 1.157143  
3                                 1.950000  
4                                 1.240000  
5                                 1.050000  
6                                 1.379167  
7        

The table presents a detailed analysis of the Delhi Metro lines, including the number of stations on each line and the average distance between stations.

In [45]:
fig = make_subplots(rows=1, cols=2, subplot_titles=('Number of Stations Per Metro Line',
                                                    'Average Distance Between Stations Per Metro Line'),
                    horizontal_spacing=0.2)

# plot for Number of Stations per Line
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Number of Stations'],
           orientation='h', name='Number of Stations', marker_color='crimson'),
    row=1, col=1
)

# plot for Average Distance Between Stations
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Average Distance Between Stations (km)'],
           orientation='h', name='Average Distance (km)', marker_color='navy'),
    row=1, col=2
)
fig.update_yaxes(title_text="Metro Line", row=1, col=1)
fig.update_yaxes(title_text="", row=1, col=2)

# update layout
fig.update_layout(height=600, width=1200, title_text="Metro Line Analysis", template="plotly_white")

fig.show()

The visualization shows that Green line branch being the shortest line with total number of 3 metro stations and around 500 metres less than the Gray line, whereas even thought the Blue line has the maximum number of metro stations, i.e 49, the Green line has the maximum average distance between the metro stations, making it the longest metro line.

# Station Layout Analysis

Analyzing the distribution of the layout(Elevated, Ground Level, Underground) across the network and recognize any patterns.

In [42]:
layout_counts = data['Station Layout'].value_counts()

# creating the bar plot using Plotly
fig = px.bar(x=layout_counts.index, y=layout_counts.values,
             labels={'x': 'Station Layout', 'y': 'Number of Stations'},
             title='Distribution of Delhi Metro Station Layouts',
             color=layout_counts.index,
             color_continuous_scale='pastel')

# updating layout for better presentation
fig.update_layout(xaxis_title="Station Layout",
                  yaxis_title="Number of Stations",
                  coloraxis_showscale=False,
                  template="plotly_white")

fig.show()

The bar chart and the counts show the distribution of different station layouts in the Delhi Metro network.

Observations:
1. Elevated Stations: The majority of the stations are Elevated. It is a common design choice in urban areas to save space and reduce land acquisition issues.
2. Underground Stations: The Underground stations are fewer compared to elevated ones. These are likely in densely populated or central areas where above-ground construction is less feasible.
3. At-Grade Stations: There are only a few At-Grade (ground level) stations, suggesting they are less common in the network, possibly due to land and traffic considerations.