In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
%matplotlib inline

In [2]:
import datetime 

interventions_df = pd.read_csv('data/donneesouvertes-interventions-sim.csv')
interventions_df['CREATION_DATE_TIME'] = interventions_df['CREATION_DATE_TIME'].astype('datetime64[ns]')  
interventions_df.head()

Unnamed: 0,INCIDENT_NBR,CREATION_DATE_TIME,INCIDENT_TYPE_DESC,DESCRIPTION_GROUPE,CASERNE,NOM_VILLE,NOM_ARROND,DIVISION,NOMBRE_UNITES,CIV,MTM8_X,MTM8_Y,LONGITUDE,LATITUDE
0,1168,2015-01-03 20:56:02,Premier répondant,1-REPOND,33,Montréal,Sud-Ouest,8,1.0,1.0,297283.0,5035433.1,-73.596117,45.458786
1,1171,2015-01-03 21:03:52,Alarme privé ou locale,Alarmes-incendies,22,Montréal,Saint-Léonard,4,4.0,1.0,299180.0,5049284.2,-73.57202,45.583442
2,1172,2015-01-03 21:07:00,Premier répondant,1-REPOND,9,Montréal,Villeray / St-Michel / Parc Extension,4,1.0,1.0,297259.6,5046768.2,-73.596589,45.560784
3,1177,2015-01-03 21:41:46,Premier répondant,1-REPOND,37,Montréal,Villeray / St-Michel / Parc Extension,4,1.0,1.0,294738.3,5044817.2,-73.628847,45.543197
4,1178,2015-01-03 21:43:32,Aliments surchauffés,SANS FEU,16,Montréal,Plateau Mont-Royal,11,5.0,1.0,298505.7,5042556.1,-73.580573,45.522895


In [3]:
interventions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 538766 entries, 0 to 538765
Data columns (total 14 columns):
INCIDENT_NBR          538766 non-null int64
CREATION_DATE_TIME    538766 non-null datetime64[ns]
INCIDENT_TYPE_DESC    538766 non-null object
DESCRIPTION_GROUPE    538760 non-null object
CASERNE               538766 non-null int64
NOM_VILLE             538766 non-null object
NOM_ARROND            538766 non-null object
DIVISION              538766 non-null int64
NOMBRE_UNITES         538633 non-null float64
CIV                   479899 non-null float64
MTM8_X                538766 non-null float64
MTM8_Y                538766 non-null float64
LONGITUDE             538766 non-null float64
LATITUDE              538766 non-null float64
dtypes: datetime64[ns](1), float64(6), int64(3), object(4)
memory usage: 57.5+ MB


In [4]:
interventions_df['NOM_ARROND'] = np.where(interventions_df['NOM_ARROND'] == 'Indéterminé', interventions_df['NOM_VILLE'], interventions_df['NOM_ARROND']) 
interventions_df.head()

Unnamed: 0,INCIDENT_NBR,CREATION_DATE_TIME,INCIDENT_TYPE_DESC,DESCRIPTION_GROUPE,CASERNE,NOM_VILLE,NOM_ARROND,DIVISION,NOMBRE_UNITES,CIV,MTM8_X,MTM8_Y,LONGITUDE,LATITUDE
0,1168,2015-01-03 20:56:02,Premier répondant,1-REPOND,33,Montréal,Sud-Ouest,8,1.0,1.0,297283.0,5035433.1,-73.596117,45.458786
1,1171,2015-01-03 21:03:52,Alarme privé ou locale,Alarmes-incendies,22,Montréal,Saint-Léonard,4,4.0,1.0,299180.0,5049284.2,-73.57202,45.583442
2,1172,2015-01-03 21:07:00,Premier répondant,1-REPOND,9,Montréal,Villeray / St-Michel / Parc Extension,4,1.0,1.0,297259.6,5046768.2,-73.596589,45.560784
3,1177,2015-01-03 21:41:46,Premier répondant,1-REPOND,37,Montréal,Villeray / St-Michel / Parc Extension,4,1.0,1.0,294738.3,5044817.2,-73.628847,45.543197
4,1178,2015-01-03 21:43:32,Aliments surchauffés,SANS FEU,16,Montréal,Plateau Mont-Royal,11,5.0,1.0,298505.7,5042556.1,-73.580573,45.522895


In [5]:
fire_types = [
'10-22 avec feu',
'Feu de champ *',
'Ac.vic. feu train/m√©tro F7/DES',
'Acc. sans vict. av. feu - ext.',
'Acc. sans vict. av. feu-tunnel',
'Acc. s-vict. av. feu - pont',
'Acc. s-vict. av. feu - v.r.',
'Acc. s-vict. av. feu-b√¢timent',
'Acc. s-vict. feu - train/m√©tro',
'Acc. vict. feu - tunnel F7/MOU',
'Acc. vict. sfeu tunnel F7/MOU',
'Acc. victime feu - b√¢timent',
'Acc. victime feu - pont',
'Acc. victime feu - voie rapide',
'Acc. victime feu ext.',
'Acc. victime sfeu - b√¢timent',
'Acc. victime sfeu - pont',
'Acc. victime sfeu - voie rap.',
'D√©chets en feu',
'Feu / 2e Alerte',
'Feu / 3e Alerte',
'Feu / 4e Alerte',
'Feu / 5e Alerte',
'Feu / Agravation possible',
'Feu d\'auto (voie rapide)',
'Feu de b√¢timent',
'Feu de chemin√©e *',
'Feu de cuisson',
'Feu de nature √©lectrique',
'Feu de navire/b√¢teau',
'Feu de v√©hicule ext√©rieur',
'Feu de v√©hicule int√©rieur',
'Feu de v√©hicule sur pont',
'feu de v√©hicule tunnel F7/MOU',
'Feu v√©h. convoi (train/m√©tro)',
'M√©tro b√¢timent /10-22 avec feu',
'M√©tro b√¢timent /10-22 sans feu',
'Senteur de feu √† l\'ext√©rieur]']

fire_interventions = interventions_df[interventions_df['INCIDENT_TYPE_DESC'].isin(fire_types)]
fire_interventions.head()

Unnamed: 0,INCIDENT_NBR,CREATION_DATE_TIME,INCIDENT_TYPE_DESC,DESCRIPTION_GROUPE,CASERNE,NOM_VILLE,NOM_ARROND,DIVISION,NOMBRE_UNITES,CIV,MTM8_X,MTM8_Y,LONGITUDE,LATITUDE
14,1599,2015-01-04 15:35:19,Feu de champ *,AUTREFEU,53,Beaconsfield,Beaconsfield,2,1.0,1.0,276170.9,5031835.0,-73.865855,45.425863
29,1623,2015-01-04 15:59:59,Feu de champ *,AUTREFEU,66,Montréal,Verdun,8,1.0,1.0,298843.7,5035305.0,-73.576159,45.457649
61,2864,2015-01-06 23:01:06,10-22 avec feu,INCENDIE,19,Montréal,Ville-Marie,6,6.0,1.0,301176.3,5043239.1,-73.546392,45.529059
223,2905,2015-01-07 02:37:03,10-22 avec feu,INCENDIE,73,Montréal,Saint-Laurent,3,7.0,1.0,291393.6,5042740.1,-73.671621,45.524449
252,3722,2015-01-08 21:18:19,Feu de champ *,AUTREFEU,29,Montréal,Rosemont / Petite-Patrie,7,1.0,1.0,298634.7,5046099.2,-73.578967,45.554778


In [6]:
fire_interventions_pivot = fire_interventions.pivot_table(values='INCIDENT_NBR', index='NOM_ARROND', columns='INCIDENT_TYPE_DESC', aggfunc=pd.Series.nunique, fill_value = 0)
fire_interventions_pivot.shape

(34, 14)

In [7]:
fire_interventions_pivot.head(10)


INCIDENT_TYPE_DESC,10-22 avec feu,Acc. s-vict. av. feu - pont,Acc. s-vict. av. feu - v.r.,Acc. sans vict. av. feu - ext.,Acc. vict. sfeu tunnel F7/MOU,Acc. victime sfeu - pont,Acc. victime sfeu - voie rap.,Feu / 2e Alerte,Feu / 3e Alerte,Feu / 4e Alerte,Feu / 5e Alerte,Feu / Agravation possible,Feu d'auto (voie rapide),Feu de champ *
NOM_ARROND,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Ahuntsic / Cartierville,178,1,0,4,0,0,1,6,5,1,1,3,15,380
Anjou,51,0,0,2,0,0,0,0,1,1,1,0,19,79
Baie d'Urfé,10,0,0,0,0,0,0,1,0,0,0,0,3,10
Beaconsfield,13,0,0,0,0,0,2,0,0,0,0,0,5,26
Côte St-Luc,34,0,0,1,0,0,0,4,1,0,0,3,0,55
Côte-des-Neiges / Notre-Dame-de-Grâce,285,0,2,3,0,0,0,20,5,3,2,4,10,297
Dollard-des-Ormeaux,51,0,0,1,0,0,0,1,0,0,0,3,0,61
Dorval,42,0,0,0,0,0,1,1,0,0,0,1,11,59
Hampstead,7,0,0,1,0,0,0,0,0,0,0,0,0,9
Ile Dorval,1,0,0,0,0,0,0,0,0,0,0,0,0,1


In [8]:
incident_type_df = pd.DataFrame(fire_interventions_pivot.to_records())
incident_type_df.head()

Unnamed: 0,NOM_ARROND,10-22 avec feu,Acc. s-vict. av. feu - pont,Acc. s-vict. av. feu - v.r.,Acc. sans vict. av. feu - ext.,Acc. vict. sfeu tunnel F7/MOU,Acc. victime sfeu - pont,Acc. victime sfeu - voie rap.,Feu / 2e Alerte,Feu / 3e Alerte,Feu / 4e Alerte,Feu / 5e Alerte,Feu / Agravation possible,Feu d'auto (voie rapide),Feu de champ *
0,Ahuntsic / Cartierville,178,1,0,4,0,0,1,6,5,1,1,3,15,380
1,Anjou,51,0,0,2,0,0,0,0,1,1,1,0,19,79
2,Baie d'Urfé,10,0,0,0,0,0,0,1,0,0,0,0,3,10
3,Beaconsfield,13,0,0,0,0,0,2,0,0,0,0,0,5,26
4,Côte St-Luc,34,0,0,1,0,0,0,4,1,0,0,3,0,55


In [9]:
fire_dict = {
'NOM_ARROND': 'NOM_ARROND',
'10-22 avec feu':"Total_Fires",
'Feu de champ *':"Total_Fires",
'Ac.vic. feu train/m√©tro F7/DES':"Total_Fires",
'Acc. sans vict. av. feu - ext.':"Total_Fires",
'Acc. sans vict. av. feu-tunnel':"Total_Fires",
'Acc. s-vict. av. feu - pont':"Total_Fires",
'Acc. s-vict. av. feu - v.r.':"Total_Fires",
'Acc. s-vict. av. feu-b√¢timent':"Total_Fires",
'Acc. s-vict. feu - train/m√©tro':"Total_Fires",
'Acc. vict. feu - tunnel F7/MOU':"Total_Fires",
'Acc. vict. sfeu tunnel F7/MOU':"Total_Fires",
'Acc. victime feu - b√¢timent':"Total_Fires",
'Acc. victime feu - pont':"Total_Fires",
'Acc. victime feu - voie rapide':"Total_Fires",
'Acc. victime feu ext.':"Total_Fires",
'Acc. victime sfeu - b√¢timent':"Total_Fires",
'Acc. victime sfeu - pont':"Total_Fires",
'Acc. victime sfeu - voie rap.':"Total_Fires",
'D√©chets en feu':"Total_Fires",
'Feu / 2e Alerte':"Total_Fires",
'Feu / 3e Alerte':"Total_Fires",
'Feu / 4e Alerte':"Total_Fires",
'Feu / 5e Alerte':"Total_Fires",
'Feu / Agravation possible':"Total_Fires",
'Feu d\'auto (voie rapide)':"Total_Fires",
'Feu de b√¢timent':"Total_Fires",
'Feu de chemin√©e *':"Total_Fires",
'Feu de cuisson':"Total_Fires",
'Feu de nature √©lectrique':"Total_Fires",
'Feu de navire/b√¢teau':"Total_Fires",
'Feu de v√©hicule ext√©rieur':"Total_Fires",
'Feu de v√©hicule int√©rieur':"Total_Fires",
'Feu de v√©hicule sur pont':"Total_Fires",
'feu de v√©hicule tunnel F7/MOU':"Total_Fires",
'Feu v√©h. convoi (train/m√©tro)':"Total_Fires",
'M√©tro b√¢timent /10-22 avec feu':"Total_Fires",
'M√©tro b√¢timent /10-22 sans feu':"Total_Fires",
'Senteur de feu √† l\'ext√©rieur]':"Total_Fires"}

fire_interventions_total = incident_type_df.groupby(fire_dict, axis = 1).sum()
fire_interventions_total.head(20)

Unnamed: 0,NOM_ARROND,Total_Fires
0,Ahuntsic / Cartierville,595
1,Anjou,154
2,Baie d'Urfé,24
3,Beaconsfield,46
4,Côte St-Luc,98
5,Côte-des-Neiges / Notre-Dame-de-Grâce,631
6,Dollard-des-Ormeaux,117
7,Dorval,115
8,Hampstead,17
9,Ile Dorval,2


In [10]:
construction_year_df = pd.read_csv('data/uniteevaluationfonciere.csv')
construction_year_df = construction_year_df[['CIVIQUE_DEBUT','NOM_RUE','NOMBRE_LOGEMENT','ANNEE_CONSTRUCTION','NO_ARROND_ILE_CUM']]
construction_year_df.head()

Unnamed: 0,CIVIQUE_DEBUT,NOM_RUE,NOMBRE_LOGEMENT,ANNEE_CONSTRUCTION,NO_ARROND_ILE_CUM
0,2544,avenue Fletcher (MTL),1.0,2012.0,REM22
1,1908,boulevard Guy-Bouchard (LSL),1.0,1997.0,REM17
2,72,15e Rue (ROX),1.0,1958.0,REM31
3,105,avenue Parkdale (PCL),1.0,1974.0,REM99
4,45,13e Avenue (ROX),1.0,1955.0,REM31


In [11]:
borough_map = {
'REM05': 'Outremont',
'REM09': 'Anjou',
'REM12': 'Verdun',
'REM14': 'Saint-Léonard',
'REM15': 'Saint-Laurent',
'REM16': 'Montréal-Nord',
'REM17': 'Lasalle',
'REM19': 'Ville-Marie',
'REM20': 'Sud-Ouest',
'REM21': 'Plateau Mont-Royal',
'REM22': 'Mercier / Hochelaga-Maisonneuve',
'REM23': 'Ahuntsic / Cartierville',
'REM24': 'Rosemont / Petite-Patrie',
'REM25': 'Villeray / St-Michel / Parc Extension',
'REM27': 'Lachine',
'REM31': 'Pierrefonds / Roxboro',
'REM32': 'L\'Ile-Bizard / Ste-Geneviève',
'REM33': 'Rivière-des-Prairies / Pointe-aux-Trembles',
'REM34': 'Côte-des-Neiges / Notre-Dame-de-Grâce'
}

construction_year_df["NOM_ARROND"] = construction_year_df["NO_ARROND_ILE_CUM"].map(borough_map)
construction_year_df.head(30)

Unnamed: 0,CIVIQUE_DEBUT,NOM_RUE,NOMBRE_LOGEMENT,ANNEE_CONSTRUCTION,NO_ARROND_ILE_CUM,NOM_ARROND
0,2544,avenue Fletcher (MTL),1.0,2012.0,REM22,Mercier / Hochelaga-Maisonneuve
1,1908,boulevard Guy-Bouchard (LSL),1.0,1997.0,REM17,Lasalle
2,72,15e Rue (ROX),1.0,1958.0,REM31,Pierrefonds / Roxboro
3,105,avenue Parkdale (PCL),1.0,1974.0,REM99,
4,45,13e Avenue (ROX),1.0,1955.0,REM31,Pierrefonds / Roxboro
5,150,avenue Tremont (DVL),1.0,1985.0,REM99,
6,70,avenue Courcelette (OUT),1.0,1923.0,REM05,Outremont
7,2301,rue Saint-Patrick (LSL+MTL),,2015.0,REM20,Sud-Ouest
8,6380,21e Avenue (MTL),1.0,2003.0,REM24,Rosemont / Petite-Patrie
9,2459,chemin Laval (SLR),3.0,1959.0,REM15,Saint-Laurent


In [12]:
construction_year_df[['Street', 'Trigram']] = construction_year_df['NOM_RUE'].str.split('(',expand=True)
construction_year_df.drop('Street', axis=1, inplace=True)
construction_year_df['Trigram'] = construction_year_df['Trigram'].str.rstrip(')')
construction_year_df.head()

Unnamed: 0,CIVIQUE_DEBUT,NOM_RUE,NOMBRE_LOGEMENT,ANNEE_CONSTRUCTION,NO_ARROND_ILE_CUM,NOM_ARROND,Trigram
0,2544,avenue Fletcher (MTL),1.0,2012.0,REM22,Mercier / Hochelaga-Maisonneuve,MTL
1,1908,boulevard Guy-Bouchard (LSL),1.0,1997.0,REM17,Lasalle,LSL
2,72,15e Rue (ROX),1.0,1958.0,REM31,Pierrefonds / Roxboro,ROX
3,105,avenue Parkdale (PCL),1.0,1974.0,REM99,,PCL
4,45,13e Avenue (ROX),1.0,1955.0,REM31,Pierrefonds / Roxboro,ROX


In [13]:
construction_year_df_99 = construction_year_df[construction_year_df['NO_ARROND_ILE_CUM'] == 'REM99']
construction_year_df = construction_year_df[construction_year_df['NO_ARROND_ILE_CUM'] != 'REM99']

other_borough_map = {
    'PCL': 'Pointe-Claire', 
    'HMS': 'Hampstead',
    'SNV': 'Senneville',
    'DVL': 'Dorval', 
    'MTE': 'Montréal-Est',
    'MTR': 'Mont-Royal', 
    'CSL': 'Côte St-Luc', 
    'MTL+WMT': 'Westmount', 
    'BCF': 'Beaconsfield', 
    'KRK': 'Kirkland',
    'DDO': 'Dollard-des-Ormeaux',
    'CSL+MTL': 'Côte St-Luc',  
    'MTO': 'Montréal-Ouest', 
    'WMT': 'Westmount', 
    'DDO+PFD': 'Dollard-des-Ormeaux',
    'BDU': 'Baie d\'Urfé', 
    'MTL+MTR':'Mont-Royal',
    'SAB': 'Ste-Anne-de-Bellevue'
}

construction_year_df_99["NOM_ARROND"] = construction_year_df_99.Trigram.map(other_borough_map)
construction_year_df = construction_year_df.append(construction_year_df_99)
construction_year_df.drop(['Trigram', 'NO_ARROND_ILE_CUM', 'CIVIQUE_DEBUT', 'NOM_RUE', 'NOMBRE_LOGEMENT'], axis=1, inplace=True)
construction_year_df.head()

Unnamed: 0,ANNEE_CONSTRUCTION,NOM_ARROND
0,2012.0,Mercier / Hochelaga-Maisonneuve
1,1997.0,Lasalle
2,1958.0,Pierrefonds / Roxboro
4,1955.0,Pierrefonds / Roxboro
6,1923.0,Outremont


In [14]:
construction_year_group = construction_year_df.groupby("NOM_ARROND", as_index=True)['ANNEE_CONSTRUCTION'].mean()
construction_year_group.head()

NOM_ARROND
Ahuntsic / Cartierville    1969.434241
Anjou                      1983.330827
Baie d'Urfé                1968.260479
Beaconsfield               1967.459068
Côte St-Luc                1977.680625
Name: ANNEE_CONSTRUCTION, dtype: float64

In [15]:
construction_year_group_df = pd.DataFrame(construction_year_group)
construction_year_group_df.head()

Unnamed: 0_level_0,ANNEE_CONSTRUCTION
NOM_ARROND,Unnamed: 1_level_1
Ahuntsic / Cartierville,1969.434241
Anjou,1983.330827
Baie d'Urfé,1968.260479
Beaconsfield,1967.459068
Côte St-Luc,1977.680625


In [16]:
fire_full_df = pd.merge(fire_interventions_total, construction_year_group_df, on = 'NOM_ARROND', how='left')
fire_full_df.head(35)

Unnamed: 0,NOM_ARROND,Total_Fires,ANNEE_CONSTRUCTION
0,Ahuntsic / Cartierville,595,1969.434241
1,Anjou,154,1983.330827
2,Baie d'Urfé,24,1968.260479
3,Beaconsfield,46,1967.459068
4,Côte St-Luc,98,1977.680625
5,Côte-des-Neiges / Notre-Dame-de-Grâce,631,1958.444416
6,Dollard-des-Ormeaux,117,1976.996651
7,Dorval,115,1970.842382
8,Hampstead,17,1960.381853
9,Ile Dorval,2,


In [19]:
socio_demo_df = pd.read_csv('data/socio-demo.xlsx')
socio_demo_df.head(10)

UnicodeDecodeError: 'utf-8' codec can't decode bytes in position 0-1: invalid continuation byte

In [None]:
fire_full_df = pd.merge(fire_full_df, socio_demo_df, on = 'NOM_ARROND', how='left')
fire_full_df.head(35)

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
fire_full_no_boroughs_df = fire_full_df.drop('NOM_ARROND', axis=1)
fire_full_no_boroughs_df = scaler.fit_transform(fire_full_no_boroughs_df)
fire_full_no_boroughs_df.head(30)

In [None]:
#Finding optimal no. of clusters
from scipy.spatial.distance import cdist
clusters=range(1,10)
meanDistortions=[]
 
for k in clusters:
    model=KMeans(n_clusters=k)
    model.fit(clust_data)
    prediction=model.predict(clust_data)
    meanDistortions.append(sum(np.min(cdist(clust_data, model.cluster_centers_, 'euclidean'), axis=1)) / clust_data.shape[0])
 
#plt.cla()
plt.plot(clusters, meanDistortions, 'bx-')
plt.xlabel('k')
plt.ylabel('Average distortion')
plt.title('Selecting k with the Elbow Method')

In [None]:
final_model=KMeans(8)
final_model.fit(clust_data)
prediction=final_model.predict(clust_data)
 
#Join predicted clusters back to raw data
incident_type_df["GROUP"] = prediction
print("Groups Assigned : \n")
incident_type_df[["GROUP","NOM_ARROND"]].sort_values('GROUP')

In [None]:
gen = (x for x in incident_type_df if x not in "NOM_ARROND")

for column in gen:
    boxplot_field = column
    plt.figure()
    plt.suptitle(boxplot_field)
    plt.boxplot([incident_type_df[boxplot_field][incident_type_df.GROUP==0],
            incident_type_df[boxplot_field][incident_type_df.GROUP==1],
            incident_type_df[boxplot_field][incident_type_df.GROUP==2],
            incident_type_df[boxplot_field][incident_type_df.GROUP==3],
            incident_type_df[boxplot_field][incident_type_df.GROUP==4],
            incident_type_df[boxplot_field][incident_type_df.GROUP==5],
            incident_type_df[boxplot_field][incident_type_df.GROUP==6],
            incident_type_df[boxplot_field][incident_type_df.GROUP==7]],
            labels=('G1','G2','G3','G4','G5','G6','G7','G8'));

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

labelencoder = LabelEncoder()
arrond_df = pd.DataFrame(labelencoder.fit_transform(interventions_df['NOM_ARROND']))


onehotencoder = OneHotEncoder()
arrond_df = pd.DataFrame(onehotencoder.fit_transform(arrond_df).toarray())


In [None]:
arrond_df.head()

In [None]:
kmeans = KMeans(n_clusters = 20, n_init = 5, n_jobs = -1)
kmeans.fit(arrond_df)

In [None]:
interventions_df['cluster'] = kmeans.labels_

In [None]:
type(unique_incident_types)

In [None]:
interventions_df = interventions_df[interventions_df['INCIDENT_TYPE_DESC'].isin(unique_incident_types)]

In [None]:

clusters = interventions_df.groupby(['cluster', 'INCIDENT_TYPE_DESC']).size()
fig2, ax2 = plt.subplots(figsize = (30, 15))
sns.heatmap(clusters.unstack(level = 'INCIDENT_TYPE_DESC'), ax = ax2, cmap = 'Reds')

ax2.set_xlabel('INCIDENT_TYPE_DESC', fontdict = {'weight': 'bold', 'size': 24})
ax2.set_ylabel('cluster', fontdict = {'weight': 'bold', 'size': 24})
for label in ax2.get_xticklabels():
    label.set_size(16)
    label.set_weight("bold")
for label in ax2.get_yticklabels():
    label.set_size(16)
    label.set_weight("bold")

In [None]:
interventions_df.head(40)