In [31]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

%matplotlib notebook
%matplotlib notebook

In [32]:
# Read FLEET's routes in the network (original 1940 routes) 
old_routes = pd.read_excel('./Demand_tests_RouteNetwork.xlsx', sheet_name = 'Base')

# Treat A-B same as B-A, show both as B-A (decreasing in alphabetical order)
old_routes['ROUTE'] = old_routes.apply(lambda x: x.route_id1+'-'+x.route_id2 if x.route_id1 > x.route_id2 
                                       else x.route_id2+'-'+x.route_id1, axis=1)

old_routes

Unnamed: 0,route_id1,route_id2,Dvector,ROUTE
0,ABQ,ATL,477,ATL-ABQ
1,ABQ,BWI,123,BWI-ABQ
2,ABQ,COS,14,COS-ABQ
3,ABQ,CVG,265,CVG-ABQ
4,ABQ,DAL,555,DAL-ABQ
...,...,...,...,...
1935,SMF,SNA,706,SNA-SMF
1936,SNA,STL,104,STL-SNA
1937,STL,TPA,295,TPA-STL
1938,STL,TUL,290,TUL-STL


In [33]:
# Read new routes data (_original, _new_v1, or _new_v2) 
new_routes = pd.read_excel('./Demand_tests_RouteNetwork.xlsx', sheet_name = 'v2')

# Treat A-B same as B-A, show both as B-A (decreasing in alphabetical order)
new_routes['ROUTE'] = new_routes.apply(lambda x: x.route_id1+'-'+x.route_id2 if x.route_id1 > x.route_id2 
                                       else x.route_id2+'-'+x.route_id1, axis=1)

new_routes

Unnamed: 0,route_id1,route_id2,Dvector,ROUTE
0,ABQ,ATL,479,ATL-ABQ
1,ABQ,BWI,123,BWI-ABQ
2,ABQ,COS,14,COS-ABQ
3,ABQ,CVG,265,CVG-ABQ
4,ABQ,DAL,555,DAL-ABQ
...,...,...,...,...
1960,SMF,SNA,706,SNA-SMF
1961,SNA,STL,104,STL-SNA
1962,STL,TPA,295,TPA-STL
1963,STL,TUL,290,TUL-STL


In [34]:
comparison_df = old_routes.merge(new_routes, left_on = 'ROUTE', right_on = 'ROUTE',
                                  suffixes=('_old', '_new'), indicator=True, how='outer')
comparison_df.to_csv('./compare_1940_v2.csv')
comparison_df

Unnamed: 0,route_id1_old,route_id2_old,Dvector_old,ROUTE,route_id1_new,route_id2_new,Dvector_new,_merge
0,ABQ,ATL,477.0,ATL-ABQ,ABQ,ATL,479.0,both
1,ABQ,BWI,123.0,BWI-ABQ,ABQ,BWI,123.0,both
2,ABQ,COS,14.0,COS-ABQ,ABQ,COS,14.0,both
3,ABQ,CVG,265.0,CVG-ABQ,ABQ,CVG,265.0,both
4,ABQ,DAL,555.0,DAL-ABQ,ABQ,DAL,555.0,both
...,...,...,...,...,...,...,...,...
1964,,,,TYS-LGA,LGA,TYS,32.0,right_only
1965,,,,OMA-MCI,MCI,OMA,12.0,right_only
1966,,,,TPA-MCO,MCO,TPA,21.0,right_only
1967,,,,SLC-OKC,OKC,SLC,196.0,right_only


In [35]:
# Print out number of similar and different routes
status = comparison_df['_merge'].values.tolist()
same_routes = status.count('both')
diff_routes_old = status.count('left_only')
diff_routes_new = status.count('right_only')
print(f'There are {same_routes} same routes.\n' +
     f'There are {diff_routes_old} routes in the original dataset but not in the new dataset.\n' +
     f'There are {diff_routes_new} routes in the new dataset but not in the original dataset.')

There are 1936 same routes.
There are 4 routes in the original dataset but not in the new dataset.
There are 29 routes in the new dataset but not in the original dataset.
