In [34]:
import pandas as pd
import folium 
import plotly.express as pex
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import warnings
warnings.filterwarnings("ignore")

In [35]:
pio.templates.default = "plotly_white"

In [36]:
data = pd.read_csv("Delhi-Metro-Network.csv")

In [37]:
data.head(10)

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.67579,77.31239
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.6718,77.27756
2,3,DLF Phase 3,10.0,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935
3,4,Okhla NSIC,23.8,Magenta line,2017-12-25,Elevated,28.554483,77.264849
4,5,Dwarka Mor,10.2,Blue line,2005-12-30,Elevated,28.61932,77.03326
5,6,Dilli Haat INA [Conn: Yellow],24.9,Pink line,2018-06-08,Underground,28.574408,77.210241
6,7,Noida Sector 143,11.5,Aqua line,2019-01-25,Elevated,28.502663,77.426256
7,8,Moolchand,15.1,Voilet line,2010-03-10,Elevated,28.56417,77.23423
8,9,Chawri Bazar,15.3,Yellow line,2005-03-07,Underground,28.64931,77.22637
9,10,Maya Puri,12.8,Pink line,2018-03-14,Elevated,28.637179,77.129733


In [38]:
data.isnull().sum()

Station ID                  0
Station Name                0
Distance from Start (km)    0
Line                        0
Opening Date                0
Station Layout              0
Latitude                    0
Longitude                   0
dtype: int64

In [39]:
data.dtypes

Station ID                    int64
Station Name                 object
Distance from Start (km)    float64
Line                         object
Opening Date                 object
Station Layout               object
Latitude                    float64
Longitude                   float64
dtype: object

In [40]:
data.duplicated().sum()

0

In [41]:
data['Opening Date'] = pd.to_datetime(data['Opening Date'])

In [42]:
print(data['Opening Date'].dtypes)

datetime64[ns]


In [43]:
data['Line'].unique()

array(['Red line', 'Pink line', 'Rapid Metro', 'Magenta line',
       'Blue line', 'Aqua line', 'Voilet line', 'Yellow line',
       'Green line', 'Gray line', 'Orange line', 'Green line branch',
       'Blue line branch'], dtype=object)

## Geospatial Analysis (For visualisation of stations on Map)

In [44]:
line_colors = {
    'Red line' : 'red',
    'Pink line' : 'pink',
    'Rapid Metro' : 'cadetblue',
    'Magenta line' : 'darkred',
    'Blue line' : 'blue',
    'Aqua line' : 'black',
    'Voilet line' : 'purple',
    'Yellow line' : 'yellow',
    'Green line' : 'green',
    'Gray line' : 'lightgray', 
    'Orange line' : 'orange',
    'Green line branch' : 'lightgreen',
    'Blue line branch' : 'lightblue'
}

In [45]:
delhi_map_with_linetooltip = folium.Map(location=[28.7041, 77.1025], zoom_start=11)

In [46]:
for index, row in data.iterrows():
    line = row['Line']
    color = line_colors.get(line, 'black')

    folium.Marker(
        location= [row['Latitude'], row['Longitude']],
        popup= f"{row['Station Name']}",
        tooltip= f"{row['Station Name']}, {line}",
        icon= folium.Icon(color=color)
    ).add_to(delhi_map_with_linetooltip)

In [47]:
delhi_map_with_linetooltip

## Temporal Analysis

In [48]:
data['Opening Year'] = data['Opening Date'].dt.year

In [49]:
station_per_year = data['Opening Year'].value_counts().sort_index()

In [50]:
station_per_year_df = station_per_year.reset_index()

In [51]:
station_per_year_df.columns = ['Year', 'No. of Stations']

In [52]:
station_per_year_df

Unnamed: 0,Year,No. of Stations
0,2002,6
1,2003,4
2,2004,11
3,2005,28
4,2006,9
5,2008,3
6,2009,17
7,2010,54
8,2011,13
9,2013,5


In [53]:
fig = pex.bar(station_per_year_df, x= "Year", y = 'No. of Stations', title= "No. of Stations Opened per year" ,labels= {"Year" : "Year", "No. of Stations" : "No. Of Staions Opened"})

fig.update_layout(xaxis_tickangle = -45, xaxis = dict(tickmode='linear'), yaxis = dict(title = "No. of Stations Opened"), xaxis_title = 'Year')

fig.show()

In [54]:
stations_per_line = data['Line'].value_counts()

In [55]:
stations_per_line

Line
Blue line            49
Pink line            38
Yellow line          37
Voilet line          34
Red line             29
Magenta line         25
Aqua line            21
Green line           21
Rapid Metro          11
Blue line branch      8
Orange line           6
Gray line             3
Green line branch     3
Name: count, dtype: int64

In [56]:
total_distance_per_line = data.groupby('Line')['Distance from Start (km)'].max()

In [57]:
total_distance_per_line

Line
Aqua line            27.1
Blue line            52.7
Blue line branch      8.1
Gray line             3.9
Green line           24.8
Green line branch     2.1
Magenta line         33.1
Orange line          20.8
Pink line            52.6
Rapid Metro          10.0
Red line             32.7
Voilet line          43.5
Yellow line          45.7
Name: Distance from Start (km), dtype: float64

In [58]:
avg_distance_per_line = total_distance_per_line / (stations_per_line -1)

In [59]:
line_analysis = pd.DataFrame({
    "Line" : stations_per_line.index,
    "No. Of Stations" : stations_per_line.values,
    "Avg Distance Between Stations (km)" : avg_distance_per_line
})

In [60]:
line_analysis = line_analysis.sort_values(by= 'No. Of Stations', ascending= False)

In [61]:
line_analysis.reset_index(drop=True, inplace=True)

In [62]:
line_analysis

Unnamed: 0,Line,No. Of Stations,Avg Distance Between Stations (km)
0,Blue line,49,1.355
1,Pink line,38,1.097917
2,Yellow line,37,1.157143
3,Voilet line,34,1.95
4,Red line,29,1.24
5,Magenta line,25,1.05
6,Aqua line,21,1.379167
7,Green line,21,4.16
8,Rapid Metro,11,1.421622
9,Blue line branch,8,1.0


In [63]:
fig = make_subplots(rows=1, cols=2, subplot_titles=('No. Of Stations per Line', 'Avg Distance between stations per line'), horizontal_spacing=0.2)

fig.add_trace(
    go.Bar(
        y= line_analysis['Line'],
        x= line_analysis['No. Of Stations'],
        name= 'No. Of Stations',
        orientation='h',
        marker_color= 'crimson'
    ),
    row= 1,
    col= 1
)

fig.add_trace(
    go.Bar(
        y= line_analysis['Line'],
        x= line_analysis['Avg Distance Between Stations (km)'],
        name= 'Avg Distance (km)',
        orientation='h',
        marker_color= 'navy'
    ),
    row= 1,
    col= 2
)

fig.update_xaxes(title_text="No. of Stations", row=1, col=1)
fig.update_xaxes(title_text="Avg Distance Between Stations (km)", row=1, col=2)

fig.update_yaxes(title_text= 'Metro Line', row=1, col=1)
fig.update_yaxes(title_text= '', row=1, col=2)

fig.update_layout(height=600, width=1200, title_text="Metro Line Analysis", template="plotly_white")


fig.show()

In [64]:
layout_count = data['Station Layout'].value_counts()

In [65]:
layout_count

Station Layout
Elevated       214
Underground     68
At-Grade         3
Name: count, dtype: int64

In [66]:
fig = pex.bar(x = layout_count.index, y = layout_count.values, labels={'x' : 'Station Layout', 'y' : 'No.of Stations'}, 
              title='Distribution of Delhi Metro Station Layout', 
              color=layout_count.index, color_continuous_scale= 'pastel')

fig.update_layout(yaxis_title ="No. of Stations", 
                  xaxis_title = 'Station Layout',
                  coloraxis_showscale = False,
                  template = "plotly_white")

fig.show()