# eCARLA PGM Model 

This is a PGM model for integration within CARLA environment, trained on traffic data extracted for Brooklyn City.

In [2]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('fivethirtyeight')

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

import networkx as nx
from pgmpy.estimators import ParameterEstimator, MaximumLikelihoodEstimator, BayesianEstimator
from pgmpy.models import BayesianModel

In [4]:
df = pd.read_csv("brooklyn_2019_processed.csv")
df = df.drop_duplicates().reset_index(drop=True).drop(columns=['events', 'day'])
df

Unnamed: 0,time,pedestrians,traffic,speed,rain,fog,clouds
0,afternoon,high,medium,medium,no,light,heavy
1,early evening,low,medium,medium,no,no,no
2,early morning,low,medium,high,moderate,heavy,heavy
3,evening,low,medium,high,no,moderate,moderate
4,late evening,low,medium,high,no,no,heavy
...,...,...,...,...,...,...,...
1995,early evening,low,low,medium,no,light,no
1996,early morning,low,low,high,light,heavy,no
1997,late morning,high,low,medium,no,heavy,no
1998,morning,medium,low,medium,no,heavy,no


In [5]:
edges = [
    ('time', 'pedestrians'), 
    ('time', 'traffic'), 
    ('time', 'speed'),
    ('pedestrians', 'speed'), 
    ('traffic', 'speed'), 
    ('traffic', 'pedestrians'),
    ('rain', 'pedestrians'),
    ('rain', 'traffic'),
    ('rain', 'speed'),
    ('rain', 'fog'),
    ('fog', 'traffic'),
    ('fog', 'speed'),
    ('clouds', 'rain'),
    ('clouds', 'fog')
]

In [6]:
model = BayesianModel(edges) 
pe = ParameterEstimator(model, df)
print("\n", pe.state_counts('time'))  # unconditional
print("\n", pe.state_counts('fog'))  # conditional on fruit and size


                time
afternoon       278
early evening   287
early morning   223
evening         256
late evening    227
late morning    269
morning         237
night           223

 clouds   heavy                        light                       moderate  \
rain     heavy  light moderate     no heavy light moderate     no    heavy   
fog                                                                          
heavy      1.0  135.0     10.0   94.0   0.0  22.0      1.0   25.0      1.0   
light      0.0   36.0      0.0   45.0   0.0   6.0      0.0   12.0      0.0   
moderate   1.0   38.0      4.0   35.0   0.0   5.0      0.0   10.0      0.0   
no         0.0   62.0      4.0  280.0   0.0  22.0      1.0  152.0      0.0   

clouds                            no                        
rain     light moderate     no heavy light moderate     no  
fog                                                         
heavy     21.0      0.0   27.0   0.0  44.0      1.0   99.0  
light      6.0      1.0  

In [7]:
mle = MaximumLikelihoodEstimator(model, df)
print(mle.estimate_cpd('time'))  # unconditional
print(mle.estimate_cpd('fog'))  # conditional

+---------------------+--------+
| time(afternoon)     | 0.139  |
+---------------------+--------+
| time(early evening) | 0.1435 |
+---------------------+--------+
| time(early morning) | 0.1115 |
+---------------------+--------+
| time(evening)       | 0.128  |
+---------------------+--------+
| time(late evening)  | 0.1135 |
+---------------------+--------+
| time(late morning)  | 0.1345 |
+---------------------+--------+
| time(morning)       | 0.1185 |
+---------------------+--------+
| time(night)         | 0.1115 |
+---------------------+--------+
+---------------+---------------+---------------------+-----+----------------+---------------------+
| clouds        | clouds(heavy) | clouds(heavy)       | ... | clouds(no)     | clouds(no)          |
+---------------+---------------+---------------------+-----+----------------+---------------------+
| rain          | rain(heavy)   | rain(light)         | ... | rain(moderate) | rain(no)            |
+---------------+---------------+--

In [16]:
print(mle.estimate_cpd('fog'))

+---------------+-----+---------------------+
| rain          | ... | rain(no)            |
+---------------+-----+---------------------+
| fog(heavy)    | ... | 0.16420911528150134 |
+---------------+-----+---------------------+
| fog(light)    | ... | 0.09584450402144772 |
+---------------+-----+---------------------+
| fog(moderate) | ... | 0.07439678284182305 |
+---------------+-----+---------------------+
| fog(no)       | ... | 0.6655495978552279  |
+---------------+-----+---------------------+


In [17]:
model.fit(df, estimator=MaximumLikelihoodEstimator)

In [23]:
est = BayesianEstimator(model, df)

print(est.estimate_cpd('fog', prior_type='BDeu', equivalent_sample_size=10))

+---------------+-----+---------------------+
| rain          | ... | rain(no)            |
+---------------+-----+---------------------+
| fog(heavy)    | ... | 0.1643526262964202  |
+---------------+-----+---------------------+
| fog(light)    | ... | 0.09610237537638006 |
+---------------+-----+---------------------+
| fog(moderate) | ... | 0.07469053195048511 |
+---------------+-----+---------------------+
| fog(no)       | ... | 0.6648544663767146  |
+---------------+-----+---------------------+


In [9]:
pd.set_option('display.max_colwidth', None)

In [14]:
from IPython.core.display import display, HTML

# disable text wrapping in output cell
display(HTML("<style>div.output_area pre {white-space: pre;}</style>"))

model.cpds = []
model.fit(data=df,
          estimator=BayesianEstimator,
          prior_type="BDeu",
          equivalent_sample_size=10,
          complete_samples_only=False)

print(f'Check model: {model.check_model()}\n')
for cpd in model.get_cpds():
    print(f'CPT of {cpd.variable}:')
    print(cpd, '\n')
    print(cpd.values)

Check model: True

CPT of time:
+---------------------+----------+
| time(afternoon)     | 0.13893  |
+---------------------+----------+
| time(early evening) | 0.143408 |
+---------------------+----------+
| time(early morning) | 0.111567 |
+---------------------+----------+
| time(evening)       | 0.127985 |
+---------------------+----------+
| time(late evening)  | 0.113557 |
+---------------------+----------+
| time(late morning)  | 0.134453 |
+---------------------+----------+
| time(morning)       | 0.118532 |
+---------------------+----------+
| time(night)         | 0.111567 |
+---------------------+----------+ 

[0.13893035 0.14340796 0.11156716 0.12798507 0.11355721 0.13445274
 0.11853234 0.11156716]
CPT of pedestrians:
+---------------------+--------------------+-----+-----------------------+-----------------------+
| rain                | rain(heavy)        | ... | rain(no)              | rain(no)              |
+---------------------+--------------------+-----+------------

In [13]:
cpd.values

array([0.37189055, 0.12860697, 0.13059701, 0.36890547])