In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import itertools
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import time

In [2]:
te = TransactionEncoder()
dt = pd.read_csv('databasev2.csv', header=None, dtype=str)
dataset = dt.values.tolist()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = apriori(df, min_support=0.005, use_colnames=True)
rules = association_rules(frequent_itemsets, metric ="lift", min_threshold = 0.05) 
rules = rules.sort_values(['lift'], ascending =[False])

In [3]:
dt.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7
0,Estacion,Tipo dia,Descripcio_causa_mediata,Descripcio_torn,Descripcio_tipus_accident,Tipo_vehiculo,Genero,Edad
1,Otono,Laborable,No respectar distancies,Tarda,Abast,Motocicleta,Home,[18;33]
2,Invierno,Laborable,No respectar distancies,Tarda,Abast,Motocicleta,Home,[34;50]
3,Invierno,Laborable,Adelantamiento improcedente,Mati,Colision lateral,Motocicleta,Home,[34;50]
4,Verano,Laborable,Manca d'atencio a la conduccio,Mati,Atropellament,Motocicleta,Dona,[18;33]


In [4]:
rules.head(15)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
19588,"(Tarda, Abast multiple)","(No respectar distancies, Turisme)",0.018738,0.0286,0.005614,0.299595,10.475499,0.005078,1.386913,0.921812
19593,"(No respectar distancies, Turisme)","(Tarda, Abast multiple)",0.0286,0.018738,0.005614,0.196286,10.475499,0.005078,1.220911,0.93117
19595,(Abast multiple),"(Tarda, No respectar distancies, Turisme)",0.033379,0.016158,0.005614,0.168182,10.408323,0.005074,1.18276,0.935137
19586,"(Tarda, No respectar distancies, Turisme)",(Abast multiple),0.016158,0.033379,0.005614,0.347418,10.408323,0.005074,1.481225,0.918769
19413,(Abast multiple),"(Home, No respectar distancies, Turisme)",0.033379,0.016234,0.00531,0.159091,9.799703,0.004768,1.169884,0.928964
19404,"(Home, No respectar distancies, Turisme)",(Abast multiple),0.016234,0.033379,0.00531,0.327103,9.799703,0.004768,1.436506,0.912774
2096,"(No respectar distancies, Turisme)",(Abast multiple),0.0286,0.033379,0.009179,0.320955,9.615517,0.008225,1.423501,0.922381
2097,(Abast multiple),"(No respectar distancies, Turisme)",0.033379,0.0286,0.009179,0.275,9.615517,0.008225,1.339863,0.926942
19524,(Abast multiple),"(Laborable, No respectar distancies, Turisme)",0.033379,0.022151,0.007055,0.211364,9.541765,0.006316,1.239923,0.92611
19517,"(Laborable, No respectar distancies, Turisme)",(Abast multiple),0.022151,0.033379,0.007055,0.318493,9.541765,0.006316,1.418359,0.915477


In [4]:
antecedent_categories = ['Dona','Motocicleta','[34;50]']

consequent_categories = [
    'Colision lateral', 
    'Abast', 'Colision fronto-lateral', 'Caiguda (dues rodes)'
]


# Filtrando las reglas donde los antecedentes y consecuentes contienen cualquier combinación de los ítems de las categorías especificadas
demographic_rules = rules[
    rules['antecedents'].apply(lambda x: len(x) == 3 and all(item in antecedent_categories for item in x)) &
    rules['consequents'].apply(lambda x: len(x) == 1 and all(item in consequent_categories for item in x))
]
demographic_rules.sort_values(['confidence'], ascending=False).head(25)    

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
36902,"(Dona, [34;50], Motocicleta)",(Colision lateral),0.070627,0.284782,0.025413,0.359828,1.26352,0.0053,1.117228,0.22441
14082,"(Dona, [34;50], Motocicleta)",(Abast),0.070627,0.242831,0.020482,0.290011,1.19429,0.003332,1.066451,0.175045
32240,"(Dona, [34;50], Motocicleta)",(Colision fronto-lateral),0.070627,0.22508,0.009634,0.136412,0.606063,-0.006262,0.897327,-0.411553
25478,"(Dona, [34;50], Motocicleta)",(Caiguda (dues rodes)),0.070627,0.101578,0.008117,0.11493,1.131449,0.000943,1.015086,0.125006


In [8]:
antecedent_categories = ['Dona','Furgoneta','[18;33]']

consequent_categories = [
    'Manca d\'atencio a la conduccio', 'No respectar distancies', 'Gir indegut o sense precaucio',
    'Desobeir samoforo', 'Canvi de carril sense precaucio'
]


# Filtrando las reglas donde los antecedentes y consecuentes contienen cualquier combinación de los ítems de las categorías especificadas
demographic_rules = rules[
    rules['antecedents'].apply(lambda x: len(x) == 2 and all(item in antecedent_categories for item in x)) &
    rules['consequents'].apply(lambda x: len(x) == 1 and all(item in consequent_categories for item in x))
]
demographic_rules.sort_values(['confidence'], ascending=False).head(25)    

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
8599,"(Dona, [18;33])",(Manca d'atencio a la conduccio),0.141936,0.204445,0.027234,0.191876,0.938519,-0.001784,0.984446,-0.070929
8929,"(Dona, [18;33])",(No respectar distancies),0.141936,0.143832,0.023365,0.164618,1.144511,0.00295,1.024881,0.14715
8347,"(Dona, [18;33])",(Gir indegut o sense precaucio),0.141936,0.124412,0.020179,0.14217,1.142734,0.00252,1.020701,0.145567
7813,"(Dona, [18;33])",(Desobeir samoforo),0.141936,0.095357,0.0132,0.092998,0.975262,-0.000335,0.997399,-0.028712
4825,"(Dona, [18;33])",(Canvi de carril sense precaucio),0.141936,0.085723,0.010014,0.070551,0.823006,-0.002154,0.983676,-0.200404


In [32]:
antecedent_categories = ['Dona','Motocicleta']

consequent_categories = [
    'Manca d\'atencio a la conduccio', 'No respectar distancies', 'Gir indegut o sense precaucio',
    'Desobeir samoforo', 'Canvi de carril sense precaucio'
]


# Filtrando las reglas donde los antecedentes y consecuentes contienen cualquier combinación de los ítems de las categorías especificadas
demographic_rules = rules[
    rules['antecedents'].apply(lambda x: len(x) == 2 and all(item in antecedent_categories for item in x)) &
    rules['consequents'].apply(lambda x: len(x) == 1 and all(item in consequent_categories for item in x))
]
demographic_rules.sort_values(['confidence'], ascending=False).head(6)



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
8545,"(Motocicleta, Dona)",(Manca d'atencio a la conduccio),0.186466,0.204445,0.034972,0.187551,0.917364,-0.00315,0.979205,-0.099689
8719,"(Motocicleta, Dona)",(No respectar distancies),0.186466,0.143832,0.033075,0.17738,1.23324,0.006255,1.040781,0.232477
8305,"(Motocicleta, Dona)",(Gir indegut o sense precaucio),0.186466,0.124412,0.023972,0.12856,1.033339,0.000773,1.00476,0.039658
4802,"(Motocicleta, Dona)",(Canvi de carril sense precaucio),0.186466,0.085723,0.020938,0.112286,1.309876,0.004953,1.029923,0.290792
7771,"(Motocicleta, Dona)",(Desobeir samoforo),0.186466,0.095357,0.011227,0.060212,0.631431,-0.006554,0.962602,-0.417756


In [8]:

antecedent_categories = ['[18;33]','[34;50]','[51;70]','Home','Dona','Furgoneta']

consequent_categories = [
    'Manca d\'atencio a la conduccio', 'No respectar distancies', 'Gir indegut o sense precaucio',
    'Desobeir samoforo', 'Canvi de carril sense precaucio'
]


# Filtrando las reglas donde los antecedentes y consecuentes contienen cualquier combinación de los ítems de las categorías especificadas
demographic_rules = rules[
    rules['antecedents'].apply(lambda x: len(x) == 3 and all(item in antecedent_categories for item in x)) &
    rules['consequents'].apply(lambda x: len(x) == 1 and all(item in consequent_categories for item in x))
]
demographic_rules.sort_values(['confidence'], ascending=False).head(25)    

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric


In [9]:
antecedent_categories =[
'la Dreta de l\'Eixample','l\'Antiga Esquerra de l\'Eixample','Sant Gervasi - Galvany','la Sagrada Familia','la Nova Esquerra de l\'Eixample'
,'el Fort Pienc','les Corts','Sant Gervasi - la Bonanova','Sant Antoni','la Marina del Prat Vermell','la Maternitat i Sant Ramon','el Poble-sec',
'el Camp de l\'Arpa del Clot','Provensals del Poblenou','el Bon Pastor','Sarria','Pedralbes','el Clot','el Poblenou','Sants','Sant Andreu','Horta',
'el Baix Guinardo','la Barceloneta','la Vila de Gracia','les Tres Torres','la Marina de Port'
]

consequent_categories = [
    'Manca d\'atencio a la conduccio', 'No respectar distancies', 'Gir indegut o sense precaucio',
    'Desobeir samoforo', 'Canvi de carril sense precaucio'
]


# Filtrando las reglas donde los antecedentes y consecuentes contienen cualquier combinación de los ítems de las categorías especificadas
filtered_rules = rules[
      rules['antecedents'].apply(lambda x: all(item in antecedent_categories for item in x)) &
    rules['consequents'].apply(lambda x: all(item in consequent_categories for item in x))
]
filtered_rules.sort_values(['lift'], ascending=False).head(25)    

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric


In [10]:
antecedent_categories =[
'la Dreta de l\'Eixample','l\'Antiga Esquerra de l\'Eixample','Sant Gervasi - Galvany','la Sagrada Familia','la Nova Esquerra de l\'Eixample'
,'el Fort Pienc','les Corts','Sant Gervasi - la Bonanova','Sant Antoni','la Marina del Prat Vermell','la Maternitat i Sant Ramon','el Poble-sec',
'el Camp de l\'Arpa del Clot','Provensals del Poblenou','el Bon Pastor','Sarria','Pedralbes','el Clot','el Poblenou','Sants','Sant Andreu','Horta',
'el Baix Guinardo','la Barceloneta','la Vila de Gracia','les Tres Torres','la Marina de Port'
]

consequent_categories = [
    'Colision lateral', 
    'Abast', 'Colision fronto-lateral', 'Caiguda (dues rodes)','Abast multiple'
]


# Filtrando las reglas donde los antecedentes y consecuentes contienen cualquier combinación de los ítems de las categorías especificadas
filtered_rules = rules[
      rules['antecedents'].apply(lambda x: all(item in antecedent_categories for item in x)) &
    rules['consequents'].apply(lambda x: all(item in consequent_categories for item in x))
]
filtered_rules.sort_values(['lift'], ascending=False).head(25)  

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric


In [11]:
antecedent_categories =[
    'Eixample','Sant Marti','Sarria -Sant Gervasi','Sants-Montjuic','Horta-Guinardo','Les Corts'
]

consequent_categories = [
    'Colision lateral', 
    'Abast', 'Colision fronto-lateral', 'Caiguda (dues rodes)','Abast multiple'
]


# Filtrando las reglas donde los antecedentes y consecuentes contienen cualquier combinación de los ítems de las categorías especificadas
filtered_rules = rules[
      rules['antecedents'].apply(lambda x: all(item in antecedent_categories for item in x)) &
    rules['consequents'].apply(lambda x: all(item in consequent_categories for item in x))
]
filtered_rules.sort_values(['lift'], ascending=False).head(25)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric


In [12]:
antecedent_categories = [
    'Colision lateral', 
    'Abast', 'Colision fronto-lateral', 'Caiguda (dues rodes)','Abast multiple'
]

consequent_categories = [
    'Manca d\'atencio a la conduccio', 'No respectar distancies', 'Gir indegut o sense precaucio',
    'Desobeir samoforo', 'Canvi de carril sense precaucio'
]


# Filtrando las reglas donde los antecedentes y consecuentes contienen cualquier combinación de los ítems de las categorías especificadas
filtered_rules = rules[
      rules['antecedents'].apply(lambda x: all(item in antecedent_categories for item in x)) &
    rules['consequents'].apply(lambda x: all(item in consequent_categories for item in x))
]
filtered_rules.sort_values(['lift'], ascending=False).head(25)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
335,(Colision fronto-lateral),(Desobeir samoforo),0.22508,0.095357,0.077985,0.346478,3.63347,0.056522,1.384257,0.935297
62,(Abast multiple),(No respectar distancies),0.033379,0.143832,0.01631,0.488636,3.39726,0.011509,1.674283,0.730012
28,(Abast),(No respectar distancies),0.242831,0.143832,0.112805,0.464542,3.229745,0.077878,1.598945,0.911789
269,(Colision lateral),(Canvi de carril sense precaucio),0.284782,0.085723,0.062282,0.2187,2.551243,0.03787,1.1702,0.850139
389,(Colision lateral),(Gir indegut o sense precaucio),0.284782,0.124412,0.08064,0.283165,2.276022,0.04521,1.221463,0.783869
54,(Abast multiple),(Manca d'atencio a la conduccio),0.033379,0.204445,0.014338,0.429545,2.101027,0.007514,1.394598,0.542138
16,(Abast),(Manca d'atencio a la conduccio),0.242831,0.204445,0.092323,0.380194,1.859634,0.042677,1.283553,0.610511
240,(Caiguda (dues rodes)),(Manca d'atencio a la conduccio),0.101578,0.204445,0.023213,0.228529,1.117798,0.002446,1.031217,0.117299
227,(Caiguda (dues rodes)),(Canvi de carril sense precaucio),0.101578,0.085723,0.009255,0.091113,1.062875,0.000547,1.00593,0.065844
339,(Colision fronto-lateral),(Gir indegut o sense precaucio),0.22508,0.124412,0.028903,0.128413,1.032155,0.0009,1.00459,0.040202


In [13]:
antecedent_categories =[
    'Eixample','Sant Marti','Sarria -Sant Gervasi','Sants-Montjuic','Horta-Guinardo','Les Corts'
]

consequent_categories = [
    'Manca d\'atencio a la conduccio', 'No respectar distancies', 'Gir indegut o sense precaucio',
    'Desobeir samoforo', 'Canvi de carril sense precaucio'
]


# Filtrando las reglas donde los antecedentes y consecuentes contienen cualquier combinación de los ítems de las categorías especificadas
filtered_rules = rules[
      rules['antecedents'].apply(lambda x: all(item in antecedent_categories for item in x)) &
    rules['consequents'].apply(lambda x: all(item in consequent_categories for item in x))
]
filtered_rules.sort_values(['lift'], ascending=False).head(25)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
