# Setup

## Import Libraries

In [1]:
import bibtexparser
import pandas as pd
import numpy as np

## Parse .bib File

In [2]:
with open('Accident_Analysis_and_Prevention.bib') as bibtex_file:
    bib_database = bibtexparser.load(bibtex_file)

## Create Pandas Dataframe

In [3]:
P = pd.DataFrame(bib_database.entries)
P

Unnamed: 0,annotation,abstract,keywords,author,url,doi,issn,year,pages,volume,journal,title,ENTRYTYPE,ID,addendum,institution,note,number
0,Stuff Here,"Purpose\nSafety climate, which is defined as w...","Safety climate, Bayesian network analysis, Lea...",Yueng-hsiang Huang and Yimin He and Jin Lee an...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2020.105850,0001-4575,2021,105850,150,Accident Analysis \& Prevention,Key drivers of trucking safety climate from th...,article,HUANG2021105850,,,,
1,,Hotspot identification (HSID) is one of the mo...,"Network screening, Hotspot identification meth...",Xiaoyu Guo and Lingtao Wu and Dominique Lord,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2020.105684,0001-4575,2020,105684,145,Accident Analysis \& Prevention,Generalized criteria for evaluating hotspot id...,article,GUO2020105684,,,,
2,"Highway-Rail Grade Crossing, found two correla...",This paper proposes a machine learning approac...,"Accident prediction, Railroad grade crossing, ...",Amin Keramati and Pan Lu and Amirfarrokh Irani...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2020.105683,0001-4575,2020,105683,144,Accident Analysis \& Prevention,A crash severity analysis at highway-rail grad...,article,KERAMATI2020105683,,North Dakota State U,,
3,,Current automated driving technology cannot co...,"Automated driving, expert driver, prospective ...",Hilkka Grahn and Tuomo Kujala and Johanna Silv...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2020.105717,0001-4575,2020,105717,146,Accident Analysis \& Prevention,Expert Drivers’ Prospective Thinking-Aloud to ...,article,GRAHN2020105717,,,,
4,,Although the fatal rate of passenger vehicle-i...,"Severity analysis, Truck crashes, Intersection...",Li Song and Wei Fan,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2020.105638,0001-4575,2020,105638,144,Accident Analysis \& Prevention,Combined latent class and partial proportional...,article,SONG2020105638,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
328,,"This paper, presented in two parts, assesses t...",,Urban Kjellén,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/0001-4575(84)90023-X,0001-4575,1984,289-306,16,Accident Analysis \& Prevention,The deviation concept in occupational accident...,article,KJELLEN1984289,,,,4
329,,The present study examines workers' responses ...,,Toivo Niskanen,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/0001-4575(94)90066-3,0001-4575,1994,27-39,26,Accident Analysis \& Prevention,Assessing the safety environment in work organ...,article,NISKANEN199427,,,,1
330,,The problem of road casualties in the United S...,,Anthony N. Kontaratos,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/0001-4575(74)90002-5,0001-4575,1974,223-241,6,Accident Analysis \& Prevention,A systems analysis of the problem of road casu...,article,KONTARATOS1974223,,,,3
331,,,,,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/0001-4575(75)90009-3,0001-4575,1975,155-158,7,Accident Analysis \& Prevention,Recent publications,article,1975155,,,,2


# Fields in .bib File

In [4]:
for row in P:
    print (row)

annotation
abstract
keywords
author
url
doi
issn
year
pages
volume
journal
title
ENTRYTYPE
ID
addendum
institution
note
number


# Keywords
## Sort Keywords by Frequency

In [5]:
P['keywords'] = P['keywords'].fillna('None')
A = [ x.split(', ') for x in P['keywords'].tolist() ]
B = [item for sublist in A for item in sublist]
C = {x:B.count(x) for x in B}
D = dict(sorted(C.items(), key=lambda item: item[1], reverse=True))
D

{'None': 39,
 'Machine learning': 32,
 'Road safety': 14,
 'Safety': 13,
 'Traffic safety': 9,
 'Deep learning': 9,
 'Automated driving': 7,
 'Crash severity': 7,
 'Data mining': 7,
 'Support vector machine': 7,
 'Connected vehicles': 6,
 'Driving simulator': 6,
 'Fatigue': 6,
 'Driver behavior': 6,
 'Classification': 5,
 'Injury severity': 5,
 'Crash prediction': 5,
 'Drowsiness': 5,
 'Naturalistic driving': 5,
 'Accident analysis': 5,
 'Negative binomial model': 5,
 'Naturalistic driving study': 5,
 'Random forest': 5,
 'Motorcycle': 5,
 'Ergonomics': 5,
 'Driver distraction': 5,
 'Driving behavior': 4,
 'Text mining': 4,
 'Distracted driving': 4,
 'Built environment': 4,
 'Sleep': 4,
 'Traffic conflicts': 4,
 'Neural network': 4,
 'Risk perception': 4,
 'Driver behaviour': 4,
 'Crash': 4,
 'Risk': 4,
 'Accident causation': 4,
 'Police records': 4,
 'Narrative text': 4,
 'Driving': 3,
 'Naturalistic driving data': 3,
 'Spatial analysis': 3,
 'Machine Learning': 3,
 'Decision tree': 3

# Algorithms

## Create Dictionary of Algorithms

In [6]:
Algorithms = {
    'Bayesian': ['Bayesian Logistics Regression', 'Bayes'],
    'XGBoost':['XGBoost', 'XGB'],
    'LSTM: Long Short-Term Memory': ['Long Short-term Memory'],
    'Decision Jungle': ['Decision Jungle'],
    'Random Forest':['Random Forest'],
    'RSF: Random Survival Forest': ['Random Survival Forest'],
    'Ensemble': ['Ensemble'],
    'Deep Learning': ['Deep Learning', 'deep-learning'],
    'ANN:  Artificial Neural Network': ['Artificial Neural Network'],
    'IGA: Intelligent Genetic Algorithm': ['Intelligent Genetic Algorithm'],
    'SMO: Synthetic Minority Oversampling': ['synthetic minority oversampling'],
    'MDU: Maximum Dissimilarity Undersampling': ['maximum dissimilarity undersampling'],
    'Statistical Learning': ['Statistical learning'],
    'CIF: Cumulative Incidence Function': ['Cumulative Incidence Function'],
    'VIMP: Variable Importance': ['Variable Importance'],
    'Marginal Effect Analysis': ['Marginal Effect Analysis'],
    'Mixed Methods': ['Mixed Methods'],
    'Logistic Regression': ['Logistic Regression'],
    'Dynamic Bayesian Network': ['Dynamic Bayesian'],
    'CNN:  Convolutional Neural Network': ['Convolutional Neural Network', 'CNN'],
    'Neural Network': ['Neural Network'],
    'Feature Extraction': ['Feature Extraction'],
    'Shapley': ['Shapley'],
    'Convex Hull Algorithm': ['Convex Hull'],
    'Dimensionality Reduction': ['Dimensionality Reduction'],
    't-SNE': ['t-SNE'],
}
    

## Find Mentions of Algorithms in Abstracts or Keywords

In [7]:
for alg in Algorithms:
    P[alg] = P['abstract'].str.contains('|'.join(Algorithms[alg]), case=False) | P['keywords'].str.contains('|'.join(Algorithms[alg]), case=False)

## Count Mentions of Algorithms in Abstracts or Keywords

In [8]:
A = P[Algorithms.keys()].sum()
A.sort_values(ascending=False)

Bayesian                                    45
Neural Network                              33
Random Forest                               28
Logistic Regression                         19
Deep Learning                               16
ANN:  Artificial Neural Network              9
XGBoost                                      9
LSTM: Long Short-Term Memory                 8
CNN:  Convolutional Neural Network           8
Ensemble                                     5
Feature Extraction                           5
SMO: Synthetic Minority Oversampling         4
Statistical Learning                         4
VIMP: Variable Importance                    4
Dynamic Bayesian Network                     3
Shapley                                      2
Dimensionality Reduction                     1
Convex Hull Algorithm                        1
CIF: Cumulative Incidence Function           1
Mixed Methods                                1
Marginal Effect Analysis                     1
MDU: Maximum 

# Analysis Tools

## Create Dictionary of Analysis Tools

In [9]:
Analysis_Tools = {
    'Sensitivity': ['Sensitivity'],
    'Area under Curve': ['Area under Curve'],
    'False Alarm Rate': ['False Alarm Rate'],
    'Accuracy': ['accuracy'],
    'Precision': ['macro-average precision'], 
    'Recall': ['macro-average recall'], 
    'Geometric Mean': ['geometric mean'],
    'Hyperparameters': ['Hyperparameter'],
    'Spearman': ['Spearman'],
    'Aggregated Gain': ['Aggregated Gain'],
    'Time Dependencies': ['Time dependencies'],
    'Temporal': ['Temporal'],
    'Kinematic': ['Kinematic'],
    'Visualization': ['Visualization'],
    'Second Highway Research Program (Data Set)': ['Second Highway Research Program', 'SHRP2'],
    'F1 Loss Function': ['F1'],
    'Connected Vehicles': ['Connected Vehicles']
}

## Find Mentions of Analysis Tools in Abstracts or Keywords

In [10]:
for alg in Analysis_Tools:
    P[alg] = P['abstract'].str.contains('|'.join(Analysis_Tools[alg]), case=False) | P['keywords'].str.contains('|'.join(Analysis_Tools[alg]), case=False)

## Count Mentions of Analysis Tools in Abstracts or Keywordes

In [11]:
A = P[Analysis_Tools.keys()].sum()
A.sort_values(ascending=False)

Accuracy                                      72
Sensitivity                                   29
Temporal                                      23
Kinematic                                     10
Second Highway Research Program (Data Set)     8
Connected Vehicles                             8
False Alarm Rate                               6
Visualization                                  4
F1 Loss Function                               4
Geometric Mean                                 2
Hyperparameters                                2
Aggregated Gain                                2
Area under Curve                               1
Time Dependencies                              1
Recall                                         1
Precision                                      1
Spearman                                       1
dtype: int64

# Authors

## Sort Authors by Frequency

In [12]:
P['author'] = P['author'].fillna('None')
A = [ x.split(' and ') for x in P['author'].tolist() ]
B = [item for sublist in A for item in sublist]
C = {x:B.count(x) for x in B}
D = dict(sorted(C.items(), key=lambda item: item[1], reverse=True))
D

{'Mohamed Abdel-Aty': 14,
 'Zhibin Li': 7,
 'Junhua Wang': 6,
 'Rongjie Yu': 6,
 'Pan Liu': 6,
 'Asad J. Khattak': 5,
 'Ting Fu': 5,
 'Mohammed Quddus': 5,
 'Jinghui Yuan': 5,
 'Mark King': 5,
 'Chengcheng Xu': 5,
 'Dominique Lord': 4,
 'Helai Huang': 4,
 'Qing Cai': 4,
 'Oscar Oviedo-Trespalacios': 4,
 'None': 4,
 'George Yannis': 3,
 'X. Jessie Yang': 3,
 'Mohamed M. Ahmed': 3,
 'Jie He': 3,
 'Alfonso Montella': 3,
 'Xiaomeng Li': 3,
 'Andry Rakotonirainy': 3,
 'Pei Li': 3,
 'Simon Washington': 3,
 'Zulqarnain H. Khattak': 3,
 'Michael D. Fontaine': 3,
 'Yiik Diew Wong': 3,
 'Xiupeng Shi': 3,
 'Kirolos Haleem': 3,
 'William J. Horrey': 3,
 'Matthias Schlögl': 3,
 'Frederik Naujoks': 3,
 'Priyanka Alluri': 3,
 'Richard Forsyth': 3,
 'Richard Wright': 3,
 'Wei Wang': 3,
 'Amirfarrokh Iranitalab': 2,
 'Eleni I. Vlahogianni': 2,
 'Mahama Yahaya': 2,
 'Runhua Guo': 2,
 'Xinguo Jiang': 2,
 'Kamal Bashir': 2,
 'Shiwei Xu': 2,
 'Behram Wali': 2,
 'Tarek Sayed': 2,
 'Jaeyoung Lee': 2,
 'Tiany

## Who are these Authors?

### Mohamed Abdel-Aty
- U of Central Florida
- Editor in Chief Emeritus of the journal
- PhD from Davis

### Zhibin Li
- Southeast University, Nanjing

### Junhua Wang
- Tongji U, Shanghai

### Rongjie Yu
- Coauthor with Mohamed Abdel-Aty
- Tongji U, Shanghai

### Pan Liu
- Southeast University, Nanjing
- Coauthors:
    - Jie Bao (2)
    - Satish V. Ukkusuri
    - Xiao Qin 
    - Huaguo Zhou
    - Yanyong Guo 
    - Zhibin Li (2)
    - Yao Wu
    - Wei Wang (2)
    - Chengcheng Xu (2)

### Asad J. Khattak
- U of Tennessee



# Institutions
## Sort Institutions by Frequency

In [17]:
x = 'institution'
P[x] = P[x].fillna('None')
A = [ x.split(' and ') for x in P[x].tolist() ]
B = [item for sublist in A for item in sublist]
C = {x:B.count(x) for x in B}
D = dict(sorted(C.items(), key=lambda item: item[1], reverse=True))
D

{'None': 318,
 'Tsinghua U': 2,
 'Louisiana State U': 2,
 'North Dakota State U': 1,
 'Southwest Jiaotong U': 1,
 'Tongji U, Shanghai': 1,
 'U of Central Florida': 1,
 'Nanyang Technological U': 1,
 'Northwestern U': 1,
 'Shahid Bahonar U': 1,
 'Southeast U, Nanjing': 1,
 'Queensland U of Technology': 1,
 'U of Natural Resources': 1,
 'Life Sciences, Vienna': 1,
 'Texas A\\&M U': 1}

# Interesting Articles

In [13]:
P['annotation'] = P['annotation'].fillna('None')
Interesting = P[P['annotation'].str.contains('Interesting', case=False)]
Interesting

Unnamed: 0,annotation,abstract,keywords,author,url,doi,issn,year,pages,volume,...,Hyperparameters,Spearman,Aggregated Gain,Time Dependencies,Temporal,Kinematic,Visualization,Second Highway Research Program (Data Set),F1 Loss Function,Connected Vehicles
12,Interesting. Numerical database of simulation...,Accurate real-time prediction of occupant inju...,"Motor vehicle crashes, Occupant protection, In...",Qingfan Wang and Shun Gan and Wentao Chen and ...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2021.106149,0001-4575,2021,106149,156,...,False,False,False,False,False,True,True,False,False,False
71,Interesting,Highway work zones are most vulnerable roadway...,"Traffic collision/accident severity, Deep lear...",Md Adilur Rahim and Hany M. Hassan,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2021.106090,0001-4575,2021,106090,154,...,False,False,False,False,False,False,False,False,True,False
142,Random Forest for feature generation. Only 72...,This study designs and evaluates a contextual ...,"Drowsiness, Detection, Dynamic Bayesian Networ...",Anthony D. McDonald and John D. Lee and Chris ...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2018.01.005,0001-4575,2018,25-37,113,...,False,False,False,True,True,False,False,False,False,False
188,Interesting. Thorough analysis.,"In the United States, there are approximately ...","Highway-Rail Grade Crossing Consolidation, Clo...",Samira Soleimani and Saleh R. Mousa and Julius...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2019.04.002,0001-4575,2019,65-77,128,...,False,False,True,False,False,False,False,False,False,False


# Not Machine Learning

In [16]:
A = P[P['annotation'].str.contains('Not ML', case=False)]
A

Unnamed: 0,annotation,abstract,keywords,author,url,doi,issn,year,pages,volume,...,Hyperparameters,Spearman,Aggregated Gain,Time Dependencies,Temporal,Kinematic,Visualization,Second Highway Research Program (Data Set),F1 Loss Function,Connected Vehicles
34,Not ML,"Given the severe traffic safety issue, tremend...","Driving capability assessment, Longitudinal dr...",Rongjie Yu and Xiaojie Long and Mohammed Quddu...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2020.105779,0001-4575,2020,105779,147,...,False,False,False,False,False,False,False,False,False,False
81,Not ML,The autonomous vehicle is regarded as a promis...,"Autonomous vehicles, Driving strategy, Risk ap...",Can Zhao and Li Li and Xin Pei and Zhiheng Li ...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2020.105937,0001-4575,2021,105937,150,...,False,False,False,False,False,False,False,False,False,False
101,Not ML,As part of the emerging world of intelligent t...,"Driver behavior, Vehicle trajectories, Connect...",Zihan Hong and Ying Chen and Yang Wu,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2020.105460,0001-4575,2020,105460,139,...,False,False,False,False,False,False,False,False,False,True
116,Not ML,Traditional statistical crash prediction model...,"Bivariate extreme value theory, Video-based ve...",Chen Wang and Chengcheng Xu and Yulu Dai,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2018.12.013,0001-4575,2019,365-373,123,...,False,False,False,False,False,False,False,False,False,False
130,Not ML,Mobile phone distracted driving is a major ris...,"Cell phone, Ergonomics, Human-machine interact...",Oscar Oviedo-Trespalacios and Verity Truelove ...,https://www.sciencedirect.com/science/article/...,https://doi.org/10.1016/j.aap.2019.105412,0001-4575,2020,105412,137,...,False,False,False,False,False,False,False,False,False,False
