# Examen 2020-2021 : Personnages du disque-monde

In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules, fpmax

In [2]:
def compute_measures(df):
    rl = df.copy()
    rl['Kulc'] = rl['support']*(rl['antecedent support']+rl['consequent support'])/(2*rl['antecedent support']*rl['consequent support'])
    rl['all'] = pd.concat([rl['confidence'], rl['support']/rl['consequent support']], axis=1).min(axis=1)
    rl['max'] = pd.concat([rl['confidence'], rl['support']/rl['consequent support']], axis=1).max(axis=1)
    rl['cos'] = rl['support']/np.sqrt(rl['antecedent support']*rl['consequent support'])
    rl['IR'] = np.abs(rl['antecedent support']-rl['consequent support'])/(rl['antecedent support']+rl['consequent support']-rl['support'])
    return rl

La table suivante indique la présence (1) ou l'absence (0) de personnages dans huit romans de Terry Pratchett. Pour simplifier l'écriture, chaque personnage est représenté par une initiale (A, C, G, H, R, S ou T).

In [3]:
df = pd.DataFrame({
    "A": [0, 0, 0, 0, 1, 0, 1, 1], # Angua von Überwald
    "C": [0, 0, 1, 0, 1, 0, 1, 1], # Carrot Ironfoundersson
    "G": [0, 1, 0, 1, 0, 0, 0, 0], # Granny Weatherwax
    "H": [0, 0, 1, 0, 1, 1, 0, 1], # Havelock Vetinari
    "R": [1, 0, 0, 0, 0, 1, 0, 0], # Rincewind
    "S": [0, 0, 1, 0, 1, 0, 1, 1], # Samuel Vimes
    "T": [1, 0, 0, 0, 0, 1, 0, 0], # Twoflower
    },
    index=["The Colour of Magic", "Wyrd Sisters", "Guards! Guards!", "Witches Abroad", "Men at Arms", "Interesting Times", "Night Watch", "Thud!"])
df

Unnamed: 0,A,C,G,H,R,S,T
The Colour of Magic,0,0,0,0,1,0,1
Wyrd Sisters,0,0,1,0,0,0,0
Guards! Guards!,0,1,0,1,0,1,0
Witches Abroad,0,0,1,0,0,0,0
Men at Arms,1,1,0,1,0,1,0
Interesting Times,0,0,0,1,1,0,1
Night Watch,1,1,0,0,0,1,0
Thud!,1,1,0,1,0,1,0


**Q1.1** Calculer le support des itemsets suivants :

- {H}
- {A, C}
- {C, G}
- {A, C, H}

In [4]:
itemsets = [{'H'}, {'A', 'C'}, {'C', 'G'}, {'A', 'C', 'H'}]
fq = apriori(df, min_support=1/8, use_colnames=True)
fq[fq['itemsets'].apply(lambda x: any([x == s for s in itemsets]))]

Unnamed: 0,support,itemsets
3,0.5,(H)
7,0.375,"(A, C)"
16,0.25,"(A, C, H)"


**Q1.2** Donner une interprétation textuelle de la valeur de support de l'itemset \{C, G\}.

**Q1.3** Lister les itemsets fréquents pour un support minimum de 3, avec leur valeur de support.

In [5]:
fq = apriori(df, min_support=3/8, use_colnames=True)
fq

Unnamed: 0,support,itemsets
0,0.375,(A)
1,0.5,(C)
2,0.5,(H)
3,0.5,(S)
4,0.375,"(A, C)"
5,0.375,"(A, S)"
6,0.375,"(C, H)"
7,0.5,"(C, S)"
8,0.375,"(S, H)"
9,0.375,"(A, C, S)"


Les itemsets fréquents maximaux calculés pour un support minimum de 1 sont les suivants :

- {G} (support = 2)
- {H, R, T} (support = 1)
- {A, C, H, S} (support = 2)

**Q1.4** À partir de ces fréquents maximaux *uniquement*, donner, en justifiant votre réponse, une borne inférieure ou supérieure de la valeur de support des itemsets suivants :

- {H}
- {G, T}
- {A, C}

**Q1.5** En vous basant sur votre réponse à la question Q3.3, lister les itemsets fréquents clos pour un support minimum de 3.

In [6]:
fq = fpmax(df, min_support=3/8, use_colnames=True)
fq

Unnamed: 0,support,itemsets
0,0.375,"(A, C, S)"
1,0.375,"(C, S, H)"


**Q1.6** Calculer la confiance et la mesure de Kulczynski des règles d'association suivantes :

- A, C $\to$ H
- H $\to$ A, C
- C $\to$ S
- S $\to$ C

In [7]:
rules = [
    ({'A', 'C'}, {'H'}),
    ({'H'}, {'A', 'C'}),
    ({'C'}, {'S'}),
    ({'S'}, {'C'}),
]
fq = apriori(df, min_support=1/8, use_colnames=True)
ar = compute_measures(association_rules(fq, min_threshold=0)).sort_values(by=['Kulc', 'confidence'], ascending=False)
ar[ar[['antecedents', 'consequents']].apply(lambda x: any([x['antecedents'] == a and x['consequents'] == c for a, c in rules]), axis=1)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,Kulc,all,max,cos,IR
8,(C),(S),0.5,0.5,0.5,1.0,2.0,0.25,inf,1.0,1.0,1.0,1.0,0.0
9,(S),(C),0.5,0.5,0.5,1.0,2.0,0.25,inf,1.0,1.0,1.0,1.0,0.0
18,"(A, C)",(H),0.375,0.5,0.25,0.666667,1.333333,0.0625,1.5,0.583333,0.5,0.666667,0.57735,0.2
23,(H),"(A, C)",0.5,0.375,0.25,0.5,1.333333,0.0625,1.25,0.583333,0.5,0.666667,0.57735,0.2


La table suivante donne les thèmes principaux associés à chaque roman.

| Livre               | Thème          |
|---------------------|----------------|
| The Colour of Magic | Rincewind      |
| Wyrd Sisters        | Witches        |
| Guards! Guards!     | City Watch     |
| Witches Abroad      | Witches        |
| Men at Arms         | City Watch     |
| Interesting Times   | Rincewind      |
| Night Watch         | City Watch     |
| Thud!               | City Watch     |

**Q1.7** Comment feriez-vous pour étudier les règles d'association qui existent entre les personnages et les thèmes des romans ?