# Matrix Checks

Ensure that this has the iteration version information added to the file path and run all the way through.

Commentry to be added before each output *replacing the text in italics*

Once run and commented, download as HTML so that it is not dynamic and can be shared with colleagues who do not have Python.

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

_TMS_PATH = ('C:/Users/' +
               os.getlogin() +
               '/Documents/GitHub/Travel-Market-Synthesiser')
sys.path.append(_TMS_PATH)
import matrix_processing as mp
import distribution_curve_graph as dcg

pd.set_option('precision', 3)


In [None]:
%%capture
# Produces distribution report tuples. Note: this takes a few minutes to run
# Needs updating to be able to be flexibly pointed at different output sets
TMS_dist_report = mp.distribution_report(file_drive = 'Y:/',
                                         model_name = 'Norms',
                                         iteration = 'iter4',
                                         model_segments = ['p', 'm', 'ca'],
                                         distributions = 'Distribution Outputs/Compiled PA Matrices',
                                         matrix_format = 'long',
                                         report_tp = 'tp',
                                         internal_reports = True,
                                         write = True)

In [None]:
%%capture
FE_dist_report = mp.distribution_report(file_drive = 'Y:/',
                                        model_name = 'Norms',
                                        iteration = 'iter4',
                                        model_segments = ['p', 'm', 'ca'],
                                        distributions = 'Fusion Outputs/Compiled Fusion PA Matrices',
                                        matrix_format = 'long',
                                        report_tp = 'tp',
                                        internal_reports = True,
                                        write = True)

In [None]:
# Join the two models
TMS_dist_report_df = pd.DataFrame(TMS_dist_report[3])
FE_dist_report_df = pd.DataFrame(FE_dist_report[3])

TMS_dist_report_df.loc[:,'ca']=TMS_dist_report_df.loc[:,'ca'].astype('category')
FE_dist_report_df.loc[:,'ca']=FE_dist_report_df.loc[:,'ca'].astype('category')

## Check Matrix totals
Compare matrix totals and purpose splits with existing outputs and NoRMS. This should be done seperately by mode.

*The target for comparison with MOIRA is 541k, however that is trips, whereas the FE demand here is tours. The large difference between TMS and FE outputs suggests that FE is having a greater impact than desirable.*

In [None]:
print("TMS total demand: %.0f" % TMS_dist_report_df.dt.sum())
print("FE total demand:  %.0f" % FE_dist_report_df.dt.sum())

*Only rail-mode trips in these output matrices, therefore mode split unavailable*

In [None]:
# Code to import 24 hour total matrix from TMS, FE and show with NoRMS in int-ext format
TMS_mode_df=TMS_dist_report_df.set_index('m')
TMS_mode_sum = TMS_mode_df.dt.sum(level=('m'))

FE_mode_df=FE_dist_report_df.set_index('m')
FE_mode_sum = FE_mode_df.dt.sum(level=('m'))

mode_sum = pd.merge(pd.DataFrame(TMS_mode_sum), pd.DataFrame(FE_mode_sum), on="m", suffixes=('_TMS','_FE'))
mode_sum['prop_TMS']=mode_sum.dt_TMS/mode_sum.dt_TMS.sum()
mode_sum['prop_FE']=mode_sum.dt_FE/mode_sum.dt_FE.sum()
mode_sum

Compare rail trips by purpose with NTS purpose splits for rail trips. Note that the NTS proportions shown for comparison is all-week

*Other purpose share in TMS is slightly above NTS. After FE the other share is very close to NTS. However business is 2.5 times higher for TMS than NTS, which increases to 3 times higher for FE. The commute purpose share is therefore low.*

In [None]:
TMS_purpose_df = TMS_dist_report_df[TMS_dist_report_df["m"]=="6"]
TMS_purpose_df=TMS_purpose_df.set_index('p')
TMS_purpose_sum = TMS_purpose_df.dt.sum(level=('p'))

FE_purpose_df = FE_dist_report_df[FE_dist_report_df["m"]=="6"]
FE_purpose_df=FE_purpose_df.set_index('p')
FE_purpose_sum = FE_purpose_df.dt.sum(level=('p'))

purpose_sum=pd.merge(pd.DataFrame(TMS_purpose_sum), pd.DataFrame(FE_purpose_sum), on="p", suffixes=('_TMS','_FE'))

purpose_sum['prop_TMS']=purpose_sum.dt_TMS/purpose_sum.dt_TMS.sum()
purpose_sum['prop_FE']=purpose_sum.dt_FE/purpose_sum.dt_FE.sum()
purpose_sum

In [None]:
NTS = {'purpose': ['Business','Commute','Other'],
      'trips': [617, 4395, 7010]}

NTS_df=pd.DataFrame(NTS)
NTS_df['prop']=NTS_df.trips/NTS_df.trips.sum()

NTS_df

Compare rail trips by car avialability with NTS car availability splits for rail trips

*NCA is slightly lower in TMS than in NTS, and this is reduced again in the FE outputs.*

In [None]:
TMS_ca_df = TMS_dist_report_df[TMS_dist_report_df["m"]=="6"]
TMS_ca_df=TMS_ca_df.set_index('ca')
TMS_ca_sum = TMS_ca_df.dt.sum(level=('ca'))

FE_ca_df = FE_dist_report_df[FE_dist_report_df["m"]=="6"]
FE_ca_df=FE_ca_df.set_index('ca')
FE_ca_sum = FE_ca_df.dt.sum(level=('ca'))

ca_sum=pd.merge(pd.DataFrame(TMS_ca_sum), pd.DataFrame(FE_ca_sum), on="ca", suffixes=('_TMS','_FE'))

ca_sum['prop_TMS']=ca_sum.dt_TMS/ca_sum.dt_TMS.sum()
ca_sum['prop_FE']=ca_sum.dt_FE/ca_sum.dt_FE.sum()
ca_sum

In [None]:
NTS = {'car availability': ['CA','NCA'],
      'trips': [10581, 3420]}

NTS_df=pd.DataFrame(NTS)
NTS_df['prop']=NTS_df.trips/NTS_df.trips.sum()

NTS_df

## Cellwise Comparison
Comparison between TMS and FE outputs

*FE changes volumes of demand above 1 in both directions*

In [None]:
# Code to compare outputs and produce scatterplot with line of best fit

purpose_df = pd.merge(TMS_purpose_df, FE_purpose_df,
                      on=('p', 'a_zone', 'ca', 'm', 'origin', 'p_zone', "time"),
                      suffixes=('_TMS','_FE'))

plt.hist2d(purpose_df.dt_TMS, 
           purpose_df.dt_FE, 
           bins=50, 
           range=[[0.5,50],[0.5,50]],
           cmap=plt.cm.BuPu,
           norm=mpl.colors.LogNorm())
plt.colorbar()
plt.title("TMS vs FE Rail Demand")
plt.xlabel("TMS")
plt.ylabel("FE")
#plt.xscale("log")
#plt.yscale("log")
plt.show

## Trip length distribution
The trip length distribution is aiming for a set of NTS targets with a different distribution for each segment. These do not necessarily agregate to a smooth curve.

*Insert text on trip lenth distribution*

In [None]:
# Code to import trip length distributions
dcg.build_dist_curves(model_name='Norms',
                      iteration='iter4',
                      mode_subset=[6],
                      write=False)

## Matrix values check
Check for extreme values and ensure that they do not represent a problem with the outputs

*There are no negative trips. However zones 1095 and 1096 having 0 demand to zone 1 should be investigated as these are fairly generic zones in Warrington that have previously had problems due to poor definition of the limit of internal zones*

In [None]:
purpose_df.nsmallest(n=5, columns='dt_TMS')

In [None]:
purpose_df.nsmallest(n=5, columns='dt_FE')

In [None]:
purpose_df.nlargest(n=5, columns='dt_TMS')

In [None]:
purpose_df.nlargest(n=5, columns='dt_FE')

## Summary

*Insert text summarising matrix comparison*