# Setup

## Import Libraries

In [1]:
import bibtexparser
import pandas as pd
import numpy as np

## Parse .bib File

In [2]:
with open('Transportation_Research_Part_C.bib') as bibtex_file:
    bib_database = bibtexparser.load(bibtex_file)

## Create Pandas Dataframe

In [3]:
P = pd.DataFrame(bib_database.entries)
P

Unnamed: 0,abstract,keywords,author,url,doi,issn,year,pages,volume,journal,title,ENTRYTYPE,ID,note,number
0,Multi-vehicle routing problem with soft time w...,"Reinforcement learning, Vehicle routing proble...",Ke Zhang and Fang He and Zhengchao Zhang and X...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.trc.2020.102861,0968-090X,2020,102861,121,Transportation Research Part C: Emerging Techn...,Multi-vehicle routing problems with soft time ...,article,ZHANG2020102861,,
1,The present work investigates the use of smart...,"Driver’s behavior, Critical driving patterns, ...",Eleni I. Vlahogianni and Emmanouil N. Barmpoun...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.trc.2017.03.014,0968-090X,2017,196-206,79,Transportation Research Part C: Emerging Techn...,Driving analytics using smartphones: Algorithm...,article,VLAHOGIANNI2017196,,
2,This paper employs deep learning in detecting ...,"Traffic accident detection, Tweet, Social medi...",Zhenhua Zhang and Qing He and Jing Gao and Min...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.trc.2017.11.027,0968-090X,2018,580-596,86,Transportation Research Part C: Emerging Techn...,A deep learning approach for detecting traffic...,article,ZHANG2018580,,
3,A large portion of passenger requests is repor...,"Mean field multi-agent reinforcement learning,...",Zhenyu Shou and Xuan Di,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.trc.2020.102738,0968-090X,2020,102738,119,Transportation Research Part C: Emerging Techn...,Reward design for driver repositioning using m...,article,SHOU2020102738,,
4,The accurate short-term passenger flow predict...,"Deep learning, Sequence to sequence, Attention...",Siyu Hao and Der-Horng Lee and De Zhao,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.trc.2019.08.005,0968-090X,2019,287-300,107,Transportation Research Part C: Emerging Techn...,Sequence to sequence learning with attention m...,article,HAO2019287,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,This paper addresses departure time and route ...,,Hani S. Mahmassani and Yu-Hsin Liu,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/S0968-090X(99)00014-5,0968-090X,1999,91-107,7,Transportation Research Part C: Emerging Techn...,Dynamics of commuting decision behaviour under...,article,MAHMASSANI199991,,2
496,The increasing availability of opportunistic a...,"Trip travel time distribution, Markov chain, C...",Zhenliang Ma and Haris N. Koutsopoulos and Lui...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.trc.2016.11.008,0968-090X,2017,1-21,74,Transportation Research Part C: Emerging Techn...,Estimation of trip travel time distribution us...,article,MA20171,,
497,"As employers, suppliers, and transport provide...","GPS data, Activity-travel patterns, Business t...",Feng liu and Ziyou Gao and Davy Janssens and B...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.trc.2021.103136,0968-090X,2021,103136,128,Transportation Research Part C: Emerging Techn...,Identifying business activity-travel patterns ...,article,LIU2021103136,,
498,This paper proposes a framework for evaluating...,"Dynamic journey travel time distribution, Rela...",Agachai Sumalee and Tianlu Pan and Renxin Zhon...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.trc.2012.11.003,0968-090X,2013,263-285,35,Transportation Research Part C: Emerging Techn...,Dynamic stochastic journey time estimation and...,article,SUMALEE2013263,,


# Fields in .bib File

In [4]:
for row in P:
    print (row)

abstract
keywords
author
url
doi
issn
year
pages
volume
journal
title
ENTRYTYPE
ID
note
number


# Keywords
## Sort Keywords by Frequency

In [5]:
P['keywords'] = P['keywords'].fillna('None')
A = [ x.split(', ') for x in P['keywords'].tolist() ]
B = [item for sublist in A for item in sublist]
C = {x:B.count(x) for x in B}
D = dict(sorted(C.items(), key=lambda item: item[1], reverse=True))
D

{'Machine learning': 44,
 'Deep learning': 31,
 'None': 18,
 'Big data': 15,
 'Data mining': 14,
 'Clustering': 13,
 'Traffic flow': 11,
 'Reinforcement learning': 10,
 'Prediction': 10,
 'Traffic forecasting': 9,
 'Autonomous vehicles': 9,
 'Social media': 8,
 'Car-following': 8,
 'Air traffic management': 7,
 'Classification': 7,
 'GPS': 7,
 'Intelligent transportation systems': 7,
 'Traffic prediction': 7,
 'Neural network': 7,
 'Calibration': 7,
 'Driving behavior': 6,
 'Feature selection': 6,
 'Road safety': 6,
 'Travel behavior': 6,
 'Random forest': 6,
 'Safety': 6,
 'Agent-based model': 5,
 'Survey': 5,
 'Anomaly detection': 5,
 'Artificial neural network': 5,
 'Route choice': 5,
 'Traffic state estimation': 5,
 'Cluster analysis': 5,
 'Computer vision': 5,
 'Public transport': 5,
 'Missing data': 5,
 'Agent-based modeling': 5,
 'Travel time prediction': 5,
 'Intelligent transportation system': 5,
 'Driver behaviour': 5,
 'Neural networks': 5,
 'Dynamic traffic assignment': 5,


# Algorithms

## Create Dictionary of Algorithms

In [6]:
Algorithms = {
    'ANN:  Artificial Neural Network': ['Artificial Neural Network'],
    'Bayesian': ['Bayesian Logistics Regression', 'Bayes'],
    'Binomial Regression': ['Binomial Regression'],
    'Convex Hull Algorithm': ['Convex Hull'],
    'CNN:  Convolutional Neural Network': ['Convolutional Neural Network', 'CNN'],
    'CIF: Cumulative Incidence Function': ['Cumulative Incidence Function'],
    'Decision Jungle': ['Decision Jungle'],
    'Deep Learning': ['Deep Learning', 'deep-learning'],
    'Dimensionality Reduction': ['Dimensionality Reduction'],
    'Dynamic Bayesian Network': ['Dynamic Bayesian'],
    'Ensemble': ['Ensemble'],
    'Feature Extraction': ['Feature Extraction'],
    'Fuzzy Logic': ['Fuzzy Logic'],
    'Genetic Algorithm': ['Genetic Algorithm', 'Genetic Programming'],
    'IGA: Intelligent Genetic Algorithm': ['Intelligent Genetic Algorithm'],
    'Logistic Regression': ['Logistic Regression'],
    'LSTM: Long Short-Term Memory': ['Long Short-term Memory'],
    'Marginal Effect Analysis': ['Marginal Effect Analysis'],
    'MDU: Maximum Dissimilarity Undersampling': ['maximum dissimilarity undersampling'],
    'Mixed Methods': ['Mixed Methods'],
    'Neural Network': ['Neural Network'],
    'Random Forest':['Random Forest'],
    'RSF: Random Survival Forest': ['Random Survival Forest'],
    'Shapley': ['Shapley'],
    'Statistical Learning': ['Statistical learning'],
    'SMO: Synthetic Minority Oversampling': ['synthetic minority oversampling'],
    't-SNE': ['t-SNE'],
    'VIMP: Variable Importance': ['Variable Importance'],
    'XGBoost':['XGBoost', 'XGB'],

}
    

## Find Mentions of Algorithms in Abstracts or Keywords

In [7]:
for alg in Algorithms:
    P[alg] = P['abstract'].str.contains('|'.join(Algorithms[alg]), case=False) | P['keywords'].str.contains('|'.join(Algorithms[alg]), case=False)

## Count Mentions of Algorithms in Abstracts or Keywords

In [8]:
A = P[Algorithms.keys()].sum()
A.sort_values(ascending=False)

Neural Network                              93
Deep Learning                               56
Bayesian                                    44
Random Forest                               24
CNN:  Convolutional Neural Network          24
Ensemble                                    20
LSTM: Long Short-Term Memory                18
ANN:  Artificial Neural Network             17
Genetic Algorithm                           10
Logistic Regression                          8
Feature Extraction                           7
VIMP: Variable Importance                    3
XGBoost                                      3
Fuzzy Logic                                  3
Dynamic Bayesian Network                     3
Dimensionality Reduction                     1
RSF: Random Survival Forest                  1
SMO: Synthetic Minority Oversampling         1
t-SNE                                        1
Decision Jungle                              0
Marginal Effect Analysis                     0
MDU: Maximum 

# Analysis Tools

## Create Dictionary of Analysis Tools

In [9]:
Analysis_Tools = {
    'Sensitivity': ['Sensitivity'],
    'Area under Curve': ['Area under Curve'],
    'False Alarm Rate': ['False Alarm Rate'],
    'Accuracy': ['accuracy'],
    'Precision': ['macro-average precision'], 
    'Recall': ['macro-average recall'], 
    'Geometric Mean': ['geometric mean'],
    'Hyperparameters': ['Hyperparameter'],
    'Spearman': ['Spearman'],
    'Aggregated Gain': ['Aggregated Gain'],
    'Time Dependencies': ['Time dependencies'],
    'Temporal': ['Temporal'],
    'Kinematic': ['Kinematic'],
    'Visualization': ['Visualization'],
    'F1 Loss Function': ['F1'],
    'Connected Vehicles': ['Connected Vehicles'],
    'Imbalanced Data': ['Imbalanced Data'],
}

## Find Mentions of Analysis Tools in Abstracts or Keywords

In [10]:
for alg in Analysis_Tools:
    P[alg] = P['abstract'].str.contains('|'.join(Analysis_Tools[alg]), case=False) | P['keywords'].str.contains('|'.join(Analysis_Tools[alg]), case=False)

## Count Mentions of Analysis Tools in Abstracts or Keywordes

In [11]:
A = P[Analysis_Tools.keys()].sum()
A.sort_values(ascending=False)

Accuracy              125
Temporal               99
Sensitivity            19
Visualization           7
Kinematic               7
Connected Vehicles      6
False Alarm Rate        4
Hyperparameters         4
F1 Loss Function        3
Spearman                1
Imbalanced Data         1
Time Dependencies       0
Aggregated Gain         0
Area under Curve        0
Geometric Mean          0
Recall                  0
Precision               0
dtype: int64

# Datasets
## Create Dictionary of Datasets

In [12]:
Datasets = {
    'Second Highway Research Program (Data Set)': ['Second Highway Research Program', 'SHRP2'],
    'Virginia 100-car Database': ['Virginia', '100-car', '100 car'],
    'NGSIM Trajectory Data': ['NGSIM'],
    
}

## Find Mentions of Dataset in Abstract and Keywords

In [13]:
for x in Datasets:
    P[x] = P['abstract'].str.contains('|'.join(Datasets[x]), case=False) | P['keywords'].str.contains('|'.join(Datasets[x]), case=False)

## Count Mentions of Datasets in Abstracts and Keywords

In [14]:
A = P[Datasets.keys()].sum()
A.sort_values(ascending=False)

NGSIM Trajectory Data                         9
Virginia 100-car Database                     5
Second Highway Research Program (Data Set)    1
dtype: int64

# Authors

## Sort Authors by Frequency

In [15]:
P['author'] = P['author'].fillna('None')
A = [ x.split(' and ') for x in P['author'].tolist() ]
B = [item for sublist in A for item in sublist]
C = {x:B.count(x) for x in B}
D = dict(sorted(C.items(), key=lambda item: item[1], reverse=True))
D

{'Li Li': 9,
 'Yinhai Wang': 8,
 'Francisco C. Pereira': 7,
 'Meng Li': 6,
 'Qing He': 6,
 'Zhengbing He': 6,
 'Zhiyuan Liu': 6,
 'Xuesong Zhou': 6,
 'Satish V. Ukkusuri': 6,
 'Haris N. Koutsopoulos': 6,
 'Xi Lin': 5,
 'Xuegang (Jeff) Ban': 5,
 'Hai Yang': 5,
 'Lijun Sun': 5,
 'Jie Sun': 5,
 'Wei Ma': 5,
 'Jinhua Zhao': 5,
 'Constantinos Antoniou': 5,
 'Yunpeng Wang': 5,
 'Adel W. Sadek': 5,
 'Zhen (Sean) Qian': 5,
 'Zuduo Zheng': 5,
 'Jian Sun': 4,
 'Yang Liu': 4,
 'Jintao Ke': 4,
 'Xiqun (Michael) Chen': 4,
 'Bilal Farooq': 4,
 'Cynthia Chen': 4,
 'Mohamed Abdel-Aty': 4,
 'Xiaolei Ma': 4,
 'Lei Lin': 4,
 'Fang He': 3,
 'Zhengchao Zhang': 3,
 'Eleni I. Vlahogianni': 3,
 'Qiangqiang Guo': 3,
 'Nikola Marković': 3,
 'Jiwon Kim': 3,
 'Xi Zhu': 3,
 'Jun Chen': 3,
 'R. John Hansman': 3,
 'Lei Zhang': 3,
 'Zheng Zhu': 3,
 'Xiqun Chen': 3,
 'Xidong Pi': 3,
 'Sean Qian': 3,
 'Lingqiao Qin': 3,
 'Christian Eduardo {Verdonk Gallego}': 3,
 'Bin Ran': 3,
 'Manuel Angel {Amaro Carmona}': 3,
 'Jiep

## Who are these Authors?

### Mohamed Abdel-Aty
- U of Central Florida
- Editor in Chief Emeritus of the journal
- PhD from Davis

### Zhibin Li
- Southeast University, Nanjing

### Junhua Wang
- Tongji U, Shanghai

### Rongjie Yu
- Coauthor with Mohamed Abdel-Aty
- Tongji U, Shanghai

### Pan Liu
- Southeast University, Nanjing
- Coauthors:
    - Jie Bao (2)
    - Satish V. Ukkusuri
    - Xiao Qin 
    - Huaguo Zhou
    - Yanyong Guo 
    - Zhibin Li (2)
    - Yao Wu
    - Wei Wang (2)
    - Chengcheng Xu (2)

### Asad J. Khattak
- U of Tennessee



# Institutions
## Sort Institutions by Frequency

In [16]:
x = 'institution'
P[x] = P[x].fillna('None')
A = [ x.split(', ') for x in P[x].tolist() ]
B = [item for sublist in A for item in sublist]
C = {x:B.count(x) for x in B}
D = dict(sorted(C.items(), key=lambda item: item[1], reverse=True))
D

KeyError: 'institution'

# Interesting Articles

In [None]:
P['annotation'] = P['annotation'].fillna('None')
Interesting = P[P['annotation'].str.contains('Interesting', case=False)]
Interesting

# Not Machine Learning

In [None]:
A = P[P['annotation'].str.contains('Not ML', case=False)]
A