In [10]:
import pandas as pd
import numpy as np

from tqdm.notebook import tqdm

# Part 1

In [11]:
data_part_1 = pd.read_csv("in.csv")
data_part_1 = data_part_1[data_part_1["capital"] == "admin"].reset_index().drop(columns="index")

In [12]:
import pandas as pd
import plotly.express as px

fig = px.scatter_mapbox(data_part_1, 
                       lat=data_part_1['lat'], 
                       lon=data_part_1['lng'],
                       text = data_part_1['city'],
                       size='population',
                       color='population',
                       hover_name='city',
                       title='Population of State Capital Cities in India',
                       size_max=75,
                       center=dict(lat=22, lon=82),
                       zoom=3.8,
                       mapbox_style="carto-positron",
                       height=850,
                       color_discrete_map='viridis',
                       )

fig.show()

# part 2

In [13]:
data_part_2 = pd.read_csv("Flightschedule.csv")
data_part_2.columns = ["Origin","Destination","Flight Number","Departure Time","Arrival Time","Flight Time","Routing","Days Of operation","Connection Flight","Effective From","Effective To"]
data_part_2 = data_part_2.iloc[2:].reset_index(drop=True)[["Origin","Destination","Routing"]]
data_part_2 = data_part_2.drop_duplicates().reset_index(drop=True)

In [14]:
route_to_city = {
    'Non Stop':  None,
    'Via BOM' : 'Mumbai',
    'Via DEL' : 'Delhi',
    'Via CNN' : 'Kannur',
    'Via CCU' : 'Kolkata',
    'Via GOI' : 'Goa',
    'Via LKO' : 'Lucknow',
    'Via PNQ' : 'Pune',
    'Via HYD' : 'Hyderabad',
    'Via BLR' : 'Bengaluru',
    'Via IXC' : 'Chandigarh',
    'Via GAU' : 'Guwahati',
    'Via AMD' : 'Ahmedabad',
    'Via PAT' : 'Patna',
    'Via IXR' : 'Ranchi',
    'Via COK' : 'Kochi',
    'Via SXR' : 'Srinagar',
    'Via IXB' : 'Bagdogra',
    'Via IXL' : 'Leh',
    'Via JAI' : 'Jaipur',
    'Via NAG' : 'Nagpur',
    'Via VNS' : 'Varanasi',
    'Via IXJ' : 'Jammu',
    'Via GOX' : 'Goa-New Goa',
    'Via MAA' : 'Chennai',
}

def apply_routing(x):
    if x in route_to_city.keys():
        return route_to_city[x]
    else:
        return None

In [15]:
def shuffles(dataset):
    new_dataset = pd.DataFrame()
    routes = dataset["Routing"].apply(lambda x : apply_routing(x))

    
    count = 0
    for i in range(len(routes)):
        if routes[i] == None:
            new_dataset = pd.concat([new_dataset,dataset.iloc[i][["Origin","Destination"]]])
        else:
            new_dataset = pd.concat([new_dataset,pd.DataFrame({'Origin' : dataset.iloc[i]["Origin"], 'Destination' : routes[i]},index = [i+count])])
            new_dataset = pd.concat([new_dataset,pd.DataFrame({'Origin' : routes[i], 'Destination' : dataset.iloc[i]["Destination"]}   ,index = [i+count+1])])
            
            count += 1

    return new_dataset[["Origin","Destination"]].drop_duplicates().reset_index(drop = True)

preprocessed_data = shuffles(data_part_2)


In [16]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="MyApp")

cities = preprocessed_data["Origin"].unique().tolist() + preprocessed_data["Destination"].unique().tolist()

if np.nan in cities:
    cities.remove(np.nan)

city_coord = {}
for city in tqdm(cities):
    location = geolocator.geocode(city)

    city_coord[city] = [location.latitude,location.longitude]

print(city_coord)

  0%|          | 0/69 [00:00<?, ?it/s]

{'Abu Dhabi': [24.4538352, 54.3774014], 'Mumbai': [19.0785451, 72.878176], 'Delhi': [28.6517178, 77.2219388], 'Kannur': [11.8763836, 75.3737973], 'Ahmedabad': [23.0216238, 72.5797068], 'Kolkata': [22.5726723, 88.3638815], 'Goa': [15.3004543, 74.0855134], 'Lucknow': [26.8381, 80.9346001], 'Pune': [18.521428, 73.8544541], 'Hyderabad': [17.38878595, 78.46106473453146], 'Bengaluru': [12.9767936, 77.590082], 'Chandigarh': [30.72984395, 76.78414567016054], 'Amritsar': [31.6343083, 74.8736788], 'Bagdogra': [26.6988847, 88.3200303], 'Guwahati': [26.1805978, 91.753943], 'Bangkok': [13.7524938, 100.4935089], 'Patna': [25.6093239, 85.1235252], 'Ranchi': [23.3700501, 85.3250387], 'Kochi': [9.9674277, 76.2454436], 'Srinagar': [34.0747444, 74.8204443], 'Chennai': [13.0836939, 80.270186], 'Dammam': [26.4367824, 50.1039991], 'Dehra Dun': [30.3255646, 78.0436813], 'Leh': [34.1642029, 77.5848133], 'Jaipur': [26.9154576, 75.8189817], 'Nagpur': [21.1498134, 79.0820556], 'Varanasi': [25.3356491, 83.0076292

In [17]:
preprocessed_data

Unnamed: 0,Origin,Destination
0,Abu Dhabi,Mumbai
1,Mumbai,Ahmedabad
2,Abu Dhabi,Delhi
3,Delhi,Ahmedabad
4,Mumbai,Amritsar
...,...,...
163,Phuket,Bengaluru
164,Phuket,Delhi
165,Port Blair,Kolkata
166,Port Blair,Bengaluru


In [18]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

for i in range(len(preprocessed_data)):
    fig.add_trace(
        go.Scattergeo(
            lon = [city_coord[preprocessed_data['Origin'][i]][1], city_coord[preprocessed_data['Destination'][i]][1]],
            lat = [city_coord[preprocessed_data['Origin'][i]][0], city_coord[preprocessed_data['Destination'][i]][0]],
            mode = 'lines',
            line = dict(width = 1,color = 'red'),
            opacity = 0.2,
            hoverinfo = 'skip',
            showlegend = False,
        )
    )

fig.add_trace(go.Scattergeo(
    lon = [i[1] for i in city_coord.values()],
    lat = [i[0] for i in city_coord.values()],
    text = list(city_coord.keys()),
    mode = 'markers',
    hoverinfo = 'skip',
    showlegend = False,
    ))

fig.add_trace(go.Scattergeo(
    lon = [i[1] for i in city_coord.values()],
    lat = [i[0] for i in city_coord.values()],
    text = list(city_coord.keys()),
    textposition = 'top center',
    mode = 'text',
    ))

fig.update_layout(
    # autosize=True,
    title = 'Connection graph of Go Air domestic flights of India',
    height=800,
    geo=dict(
        center=dict(
            lat=20, 
            lon=80
        ),
        projection_scale=5,
    ),
)

fig.show()