# Clustering série temporelles

## Exploration des données

In [None]:
#conda install -c conda-forge tslearn

In [None]:
# packages nécessaires
import os
import math

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from scipy.cluster.hierarchy import dendrogram

from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
from sklearn.metrics import rand_score
from sklearn.metrics import adjusted_rand_score


from tslearn.barycenters import dtw_barycenter_averaging
from tslearn.clustering import TimeSeriesKMeans
from tslearn.clustering import silhouette_score as silhouette_score2
from tslearn.utils import to_time_series_dataset
from tslearn.utils import to_sklearn_dataset
from tslearn.metrics import dtw

from statsmodels.tsa.stattools import acf, pacf

In [None]:
pip install pyarrow

In [None]:
#base de données
df = pd.read_parquet("C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Données/forecasting.parquet.gzip")
df

In [None]:
list(df.columns)

In [None]:
df['time_index']=pd.to_datetime(df['time_index'])

On liste les séries temporelles selon les indices des produits

In [None]:
indice_produit=df['product_index'].unique()
myseries=[df[['time_index','ordered_volumes']][df['product_index']==k] for k in indice_produit]


In [None]:
for k in range(len(myseries)):
    myseries[k].set_index('time_index',inplace=True)
    myseries[k].sort_index(inplace=True)
myseries2=myseries.copy()
myseries3=myseries.copy()
indice_temps=[series.index for series in myseries2 ]


In [None]:
myseries[0]

In [None]:
myseries[124].plot(y='ordered_volumes')

On complète les séries temporelles pour qu'elles aient tous la taille de la plus grande

In [None]:
series_length=[len(series) for series in myseries]

In [None]:
max_len = max(series_length)
longest_series = None
for series in myseries:
    if len(series) == max_len:
        longest_series = series
for i in range(len(myseries)):
    if len(myseries[i])!= max_len:
        myseries[i] = myseries[i].reindex(longest_series.index,fill_value=0)

In [None]:
min(series_length)

In [None]:
myseries[1]['ordered_volumes']

On redimensionne les séries temporelles

In [None]:
for i in range(len(myseries)):
    scaler = MinMaxScaler()
    myseries[i] = MinMaxScaler().fit_transform(myseries[i])
    myseries[i]= myseries[i].reshape(len(myseries[i]))
    myseries2[i] = MinMaxScaler().fit_transform(myseries2[i])
    myseries2[i]= myseries2[i].reshape(len(myseries2[i]))

In [None]:
X=to_time_series_dataset(myseries2)

In [None]:
plt.plot(longest_series.index,myseries[124])

### Clustering Kmean PCA

In [None]:
cluster_count = math.ceil(math.sqrt(len(myseries))) 

In [None]:
pca = PCA(n_components=20)
pca.fit(myseries)
plt.hist(range(1,21),weights=pca.explained_variance_ratio_,bins=20)
plt.axhline(y=0.05,color='orange')

In [None]:
plt.plot(range(1,21),pca.explained_variance_ratio_)

In [None]:
pca.explained_variance_ratio_

In [None]:
pca = PCA(n_components=2)

myseries_transformed = pca.fit_transform(myseries)

In [None]:
pca = PCA(n_components=3)

myseries_transformed3 = pca.fit_transform(myseries)

In [None]:
#inertie=np.empty(25,dtype='float')

#for i in range(1,26):
#    kmeans = KMeans(n_clusters=i,max_iter=5000)
#    inertie[i-1] = kmeans.fit(myseries_transformed3).inertia_
#plt.plot(range(1,26),inertie)

In [None]:
kmeans_pca = KMeans(n_clusters=3,max_iter=5000)

labels_pca = kmeans_pca.fit_predict(myseries_transformed3)

In [None]:
fig, axs = plt.subplots(1,3,figsize=(25,5))
fig.suptitle('Clusters')
row_i=0
column_j=0
for label in set(labels_pca):
    cluster = []
    for i in range(len(labels_pca)):
            if(labels_pca[i]==label):
                axs[ column_j].plot(longest_series.index,myseries[i],c="gray",alpha=0.4)
                cluster.append(myseries[i])
    if len(cluster) > 0:
        axs[ column_j].plot(longest_series.index,np.average(np.vstack(cluster),axis=0),c="red")
    axs[column_j].set_title("Cluster "+str(row_i*4+column_j))
    column_j+=1
    if column_j%4 == 0:
        row_i+=1
        column_j=0
        

plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Résultats PCA.jpeg')
plt.show()

In [None]:
cmap = plt.cm.get_cmap("tab20", kmeans_pca.cluster_centers_.shape[0])
plt.figure(figsize=(25,10))
colors_pca=[cmap(i) for i in labels_pca]
plt.scatter(myseries_transformed[:, 0], myseries_transformed[:, 1], c=colors_pca, s=300)
plt.show()

In [None]:
fancy_names_for_labels_pca = [f"Cluster {label}" for label in labels_pca]
prédiction_pca=pd.DataFrame(zip(indice_produit,fancy_names_for_labels_pca),columns=["Series","Cluster"]).sort_values(by="Cluster").set_index("Series")

In [None]:
prédiction_pca

In [None]:
for k in range(3):
    print(prédiction_pca[prédiction_pca['Cluster']==f'Cluster {k}'].index)

### Kmeans

In [None]:
#inertie=np.empty(25,dtype='float')

#for i in range(1,26):
#    kmeans = KMeans(n_clusters=i,max_iter=5000)
#    inertie[i-1] = kmeans.fit(myseries).inertia_
#plt.plot(range(1,26),inertie)

In [None]:
kmeans = KMeans(n_clusters=3,max_iter=5000)

labels_kmeans = kmeans.fit_predict(myseries)

In [None]:

fig, axs = plt.subplots(1,3,figsize=(25,5))
fig.suptitle('Clusters')
row_i=0
column_j=0
for label in set(labels_kmeans):
    cluster = []
    for i in range(len(labels_kmeans)):
            if(labels_kmeans[i]==label):
                axs[ column_j].plot(longest_series.index,myseries[i],c="gray",alpha=0.4)
                cluster.append(myseries[i])
    if len(cluster) > 0:
        axs[ column_j].plot(longest_series.index,np.average(np.vstack(cluster),axis=0),c="red")
    axs[ column_j].set_title("Cluster "+str(row_i*4+column_j))
    column_j+=1
    if column_j%4 == 0:
        row_i+=1
        column_j=0
        

plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Résultat kmeans.jpeg')
plt.show()

In [None]:
plt.figure(figsize=(25,10))
cmap = plt.cm.get_cmap("tab20", kmeans.cluster_centers_.shape[0])
colors_kmeans=[cmap(i) for i in labels_kmeans]
plt.scatter(myseries_transformed[:, 0], myseries_transformed[:, 1], c=colors_kmeans, s=300)
plt.show()

In [None]:
fancy_names_for_labels_kmeans = [f"Cluster {label}" for label in labels_kmeans]
prédiction_kmeans=pd.DataFrame(zip(indice_produit,fancy_names_for_labels_kmeans),columns=["Series","Cluster"]).sort_values(by="Cluster").set_index("Series")

In [None]:
prédiction_kmeans

In [None]:
for k in range(3):
    print(prédiction_kmeans[prédiction_kmeans['Cluster']==f'Cluster {k}'].index)

### DTW

In [None]:
%%time
inertie=np.empty((1,10),dtype='float')

for j in range(1):
    for i in range(1,11):
        km = TimeSeriesKMeans(n_clusters=i, metric="dtw")
        inertie[j,i-1] = km.fit(X).inertia_
    print(j)
plt.plot(range(1,11),np.mean(inertie,axis=0))

In [None]:
%%time
dtw_km = TimeSeriesKMeans(n_clusters=3, metric="dtw")

labels_dtw = dtw_km.fit_predict(X)

In [None]:
fancy_names_for_labels_dtw = [f"Cluster {label}" for label in labels_dtw]
prediction_dtw=pd.DataFrame(zip(indice_produit,fancy_names_for_labels_dtw),columns=["Series","Cluster"]).sort_values(by="Cluster").set_index("Series")

In [None]:
fig, axs = plt.subplots(len(prediction_dtw[prediction_dtw['Cluster']==f'Cluster {0}'].index)//4+1,4,figsize=(25,25))
row_i=0
column_j=0
for i in prediction_dtw[prediction_dtw['Cluster']==f'Cluster {0}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show
plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Cluster 1 dtw.jpeg')

In [None]:
fig, axs = plt.subplots(len(prediction_dtw[prediction_dtw['Cluster']==f'Cluster {1}'].index)//4+1,4,figsize=(25,25))
row_i=0
column_j=0
for i in prediction_dtw[prediction_dtw['Cluster']==f'Cluster {1}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Cluster 2 dtw.jpeg')
plt.show()

In [None]:
fig, axs = plt.subplots(len(prediction_dtw[prediction_dtw['Cluster']==f'Cluster {2}'].index)//4+1,4,figsize=(25,25))
row_i=0
column_j=0
for i in prediction_dtw[prediction_dtw['Cluster']==f'Cluster {2}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
      
plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Cluster 3 dtw.jpeg')
plt.show()

In [None]:
fig, axs = plt.subplots(len(prediction_dtw[prediction_dtw['Cluster']==f'Cluster {3}'].index)//4+1,4,figsize=(25,20))
row_i=0
column_j=0
for i in prediction_dtw[prediction_dtw['Cluster']==f'Cluster {3}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show

In [None]:
fig, axs = plt.subplots(len(prediction_dtw[prediction_dtw['Cluster']==f'Cluster {4}'].index)//4+1,4,figsize=(25,25))
row_i=0
column_j=0
for i in prediction_dtw[prediction_dtw['Cluster']==f'Cluster {4}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show

In [None]:
fig, axs = plt.subplots(len(prediction_dtw[prediction_dtw['Cluster']==f'Cluster {5}'].index)//4+1,4,figsize=(25,25))
row_i=0
column_j=0
for i in prediction_dtw[prediction_dtw['Cluster']==f'Cluster {5}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show

In [None]:
for k in range(3):
    print(prediction_dtw[prediction_dtw['Cluster']==f'Cluster {k}'].index)

### Hierarchical Clustering

In [None]:
%%time
dtw_precomputed=np.zeros((125,125))
for i in range(125):
    for j in range(125):
        dtw_precomputed[i,j]=dtw(myseries2[i], myseries2[j])

In [None]:
%%time
hac_average=AgglomerativeClustering(n_clusters=5,affinity='precomputed',linkage='average',compute_distances=True)
hac_single=AgglomerativeClustering(n_clusters=5,affinity='precomputed',linkage='single',compute_distances=True)
hac_complete=AgglomerativeClustering(n_clusters=5,affinity='precomputed',linkage='complete',compute_distances=True)
modele_average = hac_average.fit(dtw_precomputed)
modele_single = hac_single.fit(dtw_precomputed)
modele_complete = hac_complete.fit(dtw_precomputed)

In [None]:
def plot_dendrogram(model, **kwargs):
    # Create linkage matrix and then plot the dendrogram

    # create the counts of samples under each node
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)
    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx < n_samples:
                current_count += 1  # leaf node
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count

    linkage_matrix = np.column_stack(
        [model.children_, model.distances_, counts]
    ).astype(float)

    # Plot the corresponding dendrogram
    dendrogram(linkage_matrix, **kwargs)

In [None]:
plot_dendrogram(modele_average, truncate_mode="level", p=5)

In [None]:
plot_dendrogram(modele_single, truncate_mode="level", p=10)

In [None]:
plot_dendrogram(modele_complete, truncate_mode="level", p=4)

In [None]:
hac=AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='complete')

labels_hac = hac.fit_predict(dtw_precomputed)

In [None]:
fancy_names_for_labels_hac = [f"Cluster {label}" for label in labels_hac]
prediction_hac=pd.DataFrame(zip(indice_produit,fancy_names_for_labels_hac),columns=["Series","Cluster"]).sort_values(by="Cluster").set_index("Series")

In [None]:
fig, axs = plt.subplots(len(prediction_hac[prediction_hac['Cluster']==f'Cluster {0}'].index)//4+1,4,figsize=(25,35))
row_i=0
column_j=0
for i in prediction_hac[prediction_hac['Cluster']==f'Cluster {0}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
     
plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Cluster 1 hierarchical clustering.jpeg')
plt.show()

In [None]:
fig, axs = plt.subplots(len(prediction_hac[prediction_hac['Cluster']==f'Cluster {1}'].index)//4+1,4,figsize=(25,10))
row_i=0
column_j=0
for i in prediction_hac[prediction_hac['Cluster']==f'Cluster {1}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0

plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Cluster 2 hierarchical clustering.jpeg')
plt.show()

In [None]:
fig, axs = plt.subplots(len(prediction_hac[prediction_hac['Cluster']==f'Cluster {2}'].index)//4+1,4,figsize=(25,25))
row_i=0
column_j=0
for i in prediction_hac[prediction_hac['Cluster']==f'Cluster {2}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
        
plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Cluster 3 hierarchical clustering.jpeg')    
plt.show()

In [None]:
fig, axs = plt.subplots(len(prediction_hac[prediction_hac['Cluster']==f'Cluster {3}'].index)//4+1,4,figsize=(25,10))
row_i=0
column_j=0
for i in prediction_hac[prediction_hac['Cluster']==f'Cluster {3}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show

In [None]:
fig, axs = plt.subplots(len(prediction_hac[prediction_hac['Cluster']==f'Cluster {4}'].index)//4+1,4,figsize=(25,10))
row_i=0
column_j=0
for i in prediction_hac[prediction_hac['Cluster']==f'Cluster {4}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show

In [None]:
for k in range(3):
    print(prediction_hac[prediction_hac['Cluster']==f'Cluster {k}'].index)

### DBSCAN

In [None]:
# Recherche des paramètres
a=np.copy(dtw_precomputed)
e=np.quantile(np.sort(a,axis=0)[1],0.95)
n=np.quantile(np.count_nonzero(a<=e,axis=0),0.05)
e2=np.quantile(np.sort(a,axis=0)[1],0.9)
n2=np.quantile(np.count_nonzero(a<=e2,axis=0),0.1)

In [None]:
%%time
dbscan=DBSCAN(eps=e,min_samples=n,metric='precomputed')

labels_dbscan = dbscan.fit_predict(dtw_precomputed)

In [None]:
fancy_names_for_labels_dbscan= [f"Cluster {label}" for label in labels_dbscan]
prediction_dbscan=pd.DataFrame(zip(indice_produit,fancy_names_for_labels_dbscan),columns=["Series","Cluster"]).sort_values(by="Cluster").set_index("Series")

In [None]:
prediction_dbscan['Cluster'].unique().shape

In [None]:
prediction_dbscan

In [None]:
for k in range(-1,2):
    print(prediction_dbscan[prediction_dbscan['Cluster']==f'Cluster {k}'].index)

In [None]:
fig, axs = plt.subplots(len(prediction_dbscan[prediction_dbscan['Cluster']==f'Cluster {-1}'].index)//4+1,4,figsize=(25,50))
row_i=0
column_j=0
for i in prediction_dbscan[prediction_dbscan['Cluster']==f'Cluster {-1}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show
plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Cluster autres DBSCAN.jpeg')

In [None]:
fig, axs = plt.subplots(len(prediction_dbscan[prediction_dbscan['Cluster']==f'Cluster {0}'].index)//4+1,4,figsize=(25,50))
row_i=0
column_j=0
for i in prediction_dbscan[prediction_dbscan['Cluster']==f'Cluster {0}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show
plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Cluster 1 DBSCAN.jpeg')

In [None]:
fig, axs = plt.subplots(len(prediction_dbscan[prediction_dbscan['Cluster']==f'Cluster {1}'].index)//4+1,4,figsize=(25,5))
row_i=0
column_j=0
for i in prediction_dbscan[prediction_dbscan['Cluster']==f'Cluster {1}'].index:
    axs[column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show
plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Cluster 2 DBSCAN.jpeg')

In [None]:
fig, axs = plt.subplots(len(prediction_dbscan[prediction_dbscan['Cluster']==f'Cluster {2}'].index)//4+1,4,figsize=(25,10))
row_i=0
column_j=0
for i in prediction_dbscan[prediction_dbscan['Cluster']==f'Cluster {2}'].index:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show

### Comparaison Kmeans, PCA et DTW

In [None]:
%%time
modeles_inertie=np.zeros((1,3,4))
for k in range(1):
    modeles_inertie[k]=np.array([[KMeans(n_clusters=3,max_iter=5000).fit(myseries_transformed).inertia_,KMeans(n_clusters=4,max_iter=5000).fit(myseries_transformed3).inertia_,KMeans(n_clusters=5,max_iter=5000).fit(myseries_transformed).inertia_,KMeans(n_clusters=6,max_iter=5000).fit(myseries_transformed).inertia_],
                         [KMeans(n_clusters=3,max_iter=5000).fit(myseries).inertia_,KMeans(n_clusters=4,max_iter=5000).fit(myseries).inertia_,KMeans(n_clusters=5,max_iter=5000).fit(myseries).inertia_,KMeans(n_clusters=6,max_iter=5000).fit(myseries).inertia_],
                          [TimeSeriesKMeans(n_clusters=3, metric="dtw").fit(X).inertia_,TimeSeriesKMeans(n_clusters=4, metric="dtw").fit(X).inertia_,TimeSeriesKMeans(n_clusters=5, metric="dtw").fit(X).inertia_,TimeSeriesKMeans(n_clusters=6, metric="dtw").fit(X).inertia_]])
    print(k)


In [None]:
fig, ax =plt.subplots(1,1,figsize=(10,10))

column_labels=["n=3", "n=4", "n=5","n=6"]
row_labels=["PCA 3 Composantes","Kmeans","DTW"]
ax.axis('tight')
ax.axis('off')
ax.table(cellText=np.mean(modeles_inertie, axis=0),colLabels=column_labels,rowLabels=row_labels,loc="center")

plt.show()

In [None]:
#fig, ax =plt.subplots(1,1,figsize=(10,10))

#column_labels=["n=3", "n=4", "n=5","n=6"]
#row_labels=["PCA 2 composantes","PCA 3 Composantes","Kmeans","DTW"]
#ax.axis('tight')
#ax.axis('off')
#ax.table(cellText=np.var(modeles_inertie, axis=0),colLabels=column_labels,rowLabels=row_labels,loc="center")

#plt.show()

### Comparaison avec d'autres mesures de score

#### Silhouette score

In [None]:
#%%time
#modeles_silhouette=np.zeros((50,5,4))
#a=silhouette_score2(X,DBSCAN(eps=e,min_samples=n,metric='precomputed').fit_predict(dtw_precomputed))
#b=silhouette_score2(X,DBSCAN(eps=e2,min_samples=n2,metric='precomputed').fit_predict(dtw_precomputed))
#c=np.array([silhouette_score2(X,AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)),silhouette_score2(X,AgglomerativeClustering(n_clusters=4,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)),silhouette_score2(X,AgglomerativeClustering(n_clusters=5,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)),silhouette_score2(X,AgglomerativeClustering(n_clusters=6,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed))])
#d= np.array([silhouette_score2(X,AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed)),silhouette_score2(X,AgglomerativeClustering(n_clusters=4,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed)),silhouette_score2(X,AgglomerativeClustering(n_clusters=5,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed)),silhouette_score2(X,AgglomerativeClustering(n_clusters=6,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed))])
#for k in range(50):
#    modeles_silhouette[k,0]=np.array([silhouette_score2(X,TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X)),silhouette_score2(X,TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X)),silhouette_score2(X,TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X)),silhouette_score2(X,TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X))])
#    modeles_silhouette[k,1]=c
#    modeles_silhouette[k,2]=d                                
#    modeles_silhouette[k,3]=np.full(4,a)
#    modeles_silhouette[k,4]=np.full(4,b)
#    print(k)


In [None]:
#fig, ax =plt.subplots(1,1,figsize=(10,10))

#column_labels=["n=3", "n=4", "n=5","n=6"]
#row_labels=["DTW","HAC Complete","HAC average","DBSCAN eps=1,82 min_samples=2","DBSCAN eps=1,72 min_samples=2"]
#ax.axis('tight')
#ax.axis('off')
#ax.table(cellText=np.mean(modeles_silhouette, axis=0),colLabels=column_labels,rowLabels=row_labels,loc="center")

#plt.show()

In [None]:
#fig, ax =plt.subplots(1,1,figsize=(10,10))

#column_labels=["n=3", "n=4", "n=5","n=6"]
#row_labels=["DTW","HAC Complete","HAC average","DBSCAN eps=1,82 min_samples=2","DBSCAN eps=1,72 min_samples=2"]
#ax.axis('tight')
#ax.axis('off')
#ax.table(cellText=np.var(modeles_silhouette, axis=0),colLabels=column_labels,rowLabels=row_labels,loc="center")

#plt.show()

#### Score de Calinski-Harabasz 

In [None]:
def BGSS(X,labels):
    bgss=0
    C=dtw_barycenter_averaging(X)
    for k in np.unique(labels):
        n_k=len(np.where(labels==k)[0])
        C_k=dtw_barycenter_averaging(X[np.where(labels==k)])
        bgss+=n_k*dtw(C_k,C)**2
    return bgss

In [None]:
def WGSS(X,labels):
    WGSS=0
    for k in np.unique(labels):
        WGSSk=0
        C_k=dtw_barycenter_averaging(X[np.where(labels==k)])
        for x in np.where(labels==k)[0]:
            WGSSk+=dtw(C_k,X[x])**2
        WGSS+=WGSSk
    return WGSS

In [None]:
def Calinski_Harabasz_score(X,labels):
    N=len(X)
    K=len(np.unique(labels))
    return BGSS(X,labels)*(N-K)/(WGSS(X,labels)*(K-1))

In [None]:
#%%time
#modeles_calinski_harabasz=np.zeros((1,5,4))
#a=Calinski_Harabasz_score(X,DBSCAN(eps=e,min_samples=n,metric='precomputed').fit_predict(dtw_precomputed))
#b=Calinski_Harabasz_score(X,DBSCAN(eps=e2,min_samples=n2,metric='precomputed').fit_predict(dtw_precomputed))
#c=np.array([Calinski_Harabasz_score(X,AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)),Calinski_Harabasz_score(X,AgglomerativeClustering(n_clusters=4,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)),Calinski_Harabasz_score(X,AgglomerativeClustering(n_clusters=5,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)),Calinski_Harabasz_score(X,AgglomerativeClustering(n_clusters=6,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed))])
#d=np.array([Calinski_Harabasz_score(X,AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed)),Calinski_Harabasz_score(X,AgglomerativeClustering(n_clusters=4,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed)),Calinski_Harabasz_score(X,AgglomerativeClustering(n_clusters=5,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed)),Calinski_Harabasz_score(X,AgglomerativeClustering(n_clusters=6,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed))])
#for k in range(1):
#    modeles_calinski_harabasz[k,0]=np.array([Calinski_Harabasz_score(X,TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X)),Calinski_Harabasz_score(X,TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X)),Calinski_Harabasz_score(X,TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X)),Calinski_Harabasz_score(X,TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X))])
#    modeles_calinski_harabasz[k,1]=c
#    modeles_calinski_harabasz[k,2]=d                                  
#    modeles_calinski_harabasz[k,3]=np.full(4,a)
#    modeles_calinski_harabasz[k,4]=np.full(4,b)
#    print(k)


In [None]:
#fig, ax =plt.subplots(1,1,figsize=(10,10))

#column_labels=["n=3", "n=4", "n=5","n=6"]
#row_labels=["DTW","HAC Complete","HAC average","DBSCAN eps=1,82 min_samples=2","DBSCAN eps=1,72 min_samples=2"]
#ax.axis('tight')
#ax.axis('off')
#ax.table(cellText=np.mean(modeles_calinski_harabasz, axis=0),colLabels=column_labels,rowLabels=row_labels,loc="center")

#plt.show()

In [None]:
#fig, ax =plt.subplots(1,1,figsize=(10,10))

#column_labels=["n=3", "n=4", "n=5","n=6"]
#row_labels=["DTW","HAC Complete","HAC average","DBSCAN eps=1,82 min_samples=2","DBSCAN eps=1,72 min_samples=2"]
#ax.axis('tight')
#ax.axis('off')
#ax.table(cellText=np.var(modeles_calinski_harabasz, axis=0),colLabels=column_labels,rowLabels=row_labels,loc="center")

#plt.show()

#### Davies-Bouldin score

In [None]:
def distance_intra_cluster(X,labels,k):
    d=0
    C_k=dtw_barycenter_averaging(X[np.where(labels==k)])
    for x in np.where(labels==k)[0]:
            d+=dtw(C_k,X[x])
    return d

In [None]:
def Davies_Bouldin_score(X,labels):
    K=len(np.unique(labels))
    score=0
    for k in np.unique(labels):
        maxi=0
        for k2 in np.unique(labels):
            if k2!=k:
                if (distance_intra_cluster(X,labels,k)+distance_intra_cluster(X,labels,k2))/dtw(dtw_barycenter_averaging(X[np.where(labels==k)]),dtw_barycenter_averaging(X[np.where(labels==k2)]))>maxi:
                    maxi=(distance_intra_cluster(X,labels,k)+distance_intra_cluster(X,labels,k2))/dtw(dtw_barycenter_averaging(X[np.where(labels==k)]),dtw_barycenter_averaging(X[np.where(labels==k2)]))
        score+=maxi
    return score

In [None]:
#%%time
#modeles_davies_bouldin=np.zeros((10,5,4))
#a=Davies_Bouldin_score(X,DBSCAN(eps=e,min_samples=n,metric='precomputed').fit_predict(dtw_precomputed))
#b=Davies_Bouldin_score(X,DBSCAN(eps=e2,min_samples=n2,metric='precomputed').fit_predict(dtw_precomputed))
#c=np.array([Davies_Bouldin_score(X,AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)),Davies_Bouldin_score(X,AgglomerativeClustering(n_clusters=4,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)),Davies_Bouldin_score(X,AgglomerativeClustering(n_clusters=5,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)),Davies_Bouldin_score(X,AgglomerativeClustering(n_clusters=6,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed))])
#d=np.array([Davies_Bouldin_score(X,AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed)),Davies_Bouldin_score(X,AgglomerativeClustering(n_clusters=4,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed)),Davies_Bouldin_score(X,AgglomerativeClustering(n_clusters=5,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed)),Davies_Bouldin_score(X,AgglomerativeClustering(n_clusters=6,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed))])
#for k in range(10):
 #   modeles_davies_bouldin[k,0]=np.array([Davies_Bouldin_score(X,TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X)),Davies_Bouldin_score(X,TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X)),Davies_Bouldin_score(X,TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X)),Davies_Bouldin_score(X,TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X))])
  #  modeles_davies_bouldin[k,1]=c
   # modeles_davies_bouldin[k,2]=d                                  
    #modeles_davies_bouldin[k,3]=np.full(4,a)
    #modeles_davies_bouldin[k,4]=np.full(4,b)
    #print(k)


In [None]:
#fig, ax =plt.subplots(1,1,figsize=(10,10))

#column_labels=["n=3", "n=4", "n=5","n=6"]
#row_labels=["DTW","HAC Complete","HAC average","DBSCAN eps=1,82 min_samples=2","DBSCAN eps=1,72 min_samples=2"]
#ax.axis('tight')
#ax.axis('off')
#ax.table(cellText=np.mean(modeles_davies_bouldin, axis=0),colLabels=column_labels,rowLabels=row_labels,loc="center")

#plt.show()


In [None]:
#fig, ax =plt.subplots(1,1,figsize=(10,10))

#column_labels=["n=3", "n=4", "n=5","n=6"]
#row_labels=["DTW","HAC Complete","HAC average","DBSCAN eps=1,82 min_samples=2","DBSCAN eps=1,72 min_samples=2"]
#ax.axis('tight')
#ax.axis('off')
#ax.table(cellText=np.var(modeles_davies_bouldin, axis=0),colLabels=column_labels,rowLabels=row_labels,loc="center")

#plt.show()

In [None]:
#for k in range(10):
#    print(rand_score(TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)))

In [None]:
#for k in range (10):
#    print(adjusted_rand_score(TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)))

In [None]:
#for k in range(10):
#    print(rand_score(TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed)))

In [None]:
#for k in range (10):
#    print(adjusted_rand_score(TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='average').fit_predict(dtw_precomputed)))

In [None]:
#for k in range(10):
#    print(rand_score(TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='single').fit_predict(dtw_precomputed)))

In [None]:
#for k in range (10):
#    print(adjusted_rand_score(TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='single').fit_predict(dtw_precomputed)))

In [None]:
def toujours_ensemble(i,j,*args):
    for k in args:
        if k[i]!=k[j]:
            return False
    return True

In [None]:
def dans_liste_de_liste(l,i):
    for k in l:
        if i in k:
            return True
    return False

In [None]:
def indices_toujours_ensemble(*args):
    l=[]
    for i in range(125):
        li=[]
        if not dans_liste_de_liste(l,indice_produit[i]):
            for j in range(125):
                if toujours_ensemble(i,j,*args):
                    li.append(indice_produit[j])
            if len(li)>1:
                l.append(li)
    return l

In [None]:
c_trois=TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X)

In [None]:
argument3=c_trois,TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=3, metric="dtw").fit_predict(X),AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)

In [None]:
%%time

a=indices_toujours_ensemble(*argument3)

In [None]:
len(a)

In [None]:
sum([len(k) for k in a])

In [None]:
[len(k) for k in a]

In [None]:
fig, axs = plt.subplots(6,4,figsize=(25,35))
row_i=0
column_j=0
for i in a[1]:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
     
plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Toujours ensemble 1.jpeg')
plt.show()

In [None]:
#fig, axs = plt.subplots(2,4,figsize=(25,15))
#row_i=0
#column_j=0
#for i in a[2]:
#    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
#    column_j+=1
#    if column_j%4==0:
#        row_i+=1
#        column_j=0
     
#plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Toujours ensemble 2.jpeg')
#plt.show()

In [None]:
fig, axs = plt.subplots(3,4,figsize=(25,15))
row_i=0
column_j=0
for i in a[5]:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
     
plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Toujours ensemble 3.jpeg')
plt.show()

In [None]:
fig, axs = plt.subplots(2,4,figsize=(25,15))
row_i=0
column_j=0
for i in a[6]:
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
     
plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Toujours ensemble 4.jpeg')
plt.show()

In [None]:
#fig, axs = plt.subplots(2,4,figsize=(25,15))
#row_i=0
#column_j=0
#for i in a[8]:
#    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==i)[0][0]],myseries2[np.where(indice_produit==i)[0][0]],c="blue")
#    column_j+=1
#    if column_j%4==0:
#        row_i+=1
#        column_j=0
     
#plt.savefig('C:/Users/gabri/OneDrive/Bureau/Cours ENSAE/2ème année/Stage/Toujours ensemble 5.jpeg')
#plt.show()

In [None]:
np.where(indice_produit==69933)

In [None]:
np.where(indice_produit==62256)

In [None]:
np.where(indice_produit==63413)

In [None]:
AgglomerativeClustering(n_clusters=3,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)[92]

In [None]:
fancy_names_for_labels_c = [f"Cluster {label}" for label in c_trois]
prediction_c=pd.DataFrame(zip(indice_produit,fancy_names_for_labels_c),columns=["Series","Cluster"]).sort_values(by="Cluster").set_index("Series")

In [None]:
for k in range(3):
    print(prediction_c[prediction_c['Cluster']==f'Cluster {k}'].index)

In [None]:
def jamais_ensemble(i,j,*args):
    for k in args:
        if k[i]==k[j]:
            return False
    return True

In [None]:
def indices_jamais_avec_k(k,*args):
    l=[]
    i=np.where(indice_produit==k)[0][0]
    for j in range(125):
        if jamais_ensemble(i,j,*args):
            l.append(indice_produit[j])
    return l

In [None]:
%%time
liste3=[]
for k in indice_produit:
    liste3.append(indices_jamais_avec_k(k,*argument3))
liste3

In [None]:
e=np.array([len(k) for k in liste3])

In [None]:
np.where(e==np.min(e))[0]

In [None]:
e[10]

In [None]:
np.where(e==np.max(e))[0]

In [None]:
liste3[0]

In [None]:
liste3[6]

In [None]:
liste3[3]

In [None]:
[indice_produit[k] for k in np.where(e==np.max(e))[0]]

### Toujours ensemble 4 clusters 

In [None]:
c_quatre=TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X)

In [None]:
argument4=c_quatre,TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=4, metric="dtw").fit_predict(X),AgglomerativeClustering(n_clusters=4,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)

In [None]:
b=indices_toujours_ensemble(*argument4)

In [None]:
len(b)

In [None]:
sum([len(k) for k in b])

In [None]:
[len(k) for k in b]

In [None]:
b[1]

### Toujours ensemble 5 clusters

In [None]:
c_cinq=TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X)

In [None]:
argument5=c_cinq,TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=5, metric="dtw").fit_predict(X),AgglomerativeClustering(n_clusters=5,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)

In [None]:
c=indices_toujours_ensemble(*argument5)

In [None]:
len(c)

In [None]:
sum([len(k) for k in c])

In [None]:
[len(k) for k in c]

In [None]:
c[0]

### Toujours ensemble 6 clusters

In [None]:
c_six=TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X)

In [None]:
argument6=c_six,TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X),TimeSeriesKMeans(n_clusters=6, metric="dtw").fit_predict(X),AgglomerativeClustering(n_clusters=6,affinity='precomputed',linkage='complete').fit_predict(dtw_precomputed)

In [None]:
d=indices_toujours_ensemble(*argument6)

In [None]:
len(d)

In [None]:
sum([len(k) for k in d])

In [None]:
[len(k) for k in d]

In [None]:
d[0]

In [None]:
d[1]

In [None]:
Liste_cluster=[70193,70196,63951,60743,70210,70203,69933,70113,69777,69917,69932,61521,62094,62095,62256,62539,62545,62546,62547,62562,62848,63412,63413,63485,63486,61395]

In [None]:
len(Liste_cluster)

In [None]:
fig, axs = plt.subplots(7,4,figsize=(25,15))
row_i=0
column_j=0
for indice in Liste_cluster:
    a=np.where(indice_produit==indice)[0][0]
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==indice)[0][0]],myseries2[np.where(indice_produit==indice)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show


In [None]:
plt.plot(indice_temps[np.where(indice_produit==Liste_cluster[9])[0][0]],myseries3[np.where(indice_produit==Liste_cluster[9])[0][0]])
plt.ylim([0,20])

In [None]:
indice_temps[np.where(indice_produit==Liste_cluster[9])[0][0]]

In [None]:
cluster_1=[70181, 70208, 60887, 70212, 70214, 70215, 70731, 60257, 69776,
            60262, 66589, 67452, 62850, 67849, 69496, 65652, 63837, 60890,
            66594, 60741, 63701, 62458, 70197, 69728, 63921, 65649, 70199,
            62461, 63922]
len(cluster_1)

In [None]:
fig, axs = plt.subplots(8,4,figsize=(25,15))
row_i=0
column_j=0
for indice in cluster_1:
    a=np.where(indice_produit==indice)[0][0]
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==indice)[0][0]],myseries3[np.where(indice_produit==indice)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show

In [None]:
cluster_2=[63485, 63486, 62546, 70185, 61522, 62547, 62562, 62589, 69933,
            62848, 70113, 69734, 69771, 69770, 62094, 62539, 63413, 69777,
            69787, 69790, 62450, 62256, 69932, 62095, 61521, 62545, 69917,
            67827, 70205, 60198, 70193, 70196, 60204, 63846, 63951, 69731,
            60412, 66855, 66598, 63833, 60743, 70210, 60555, 60354, 70203,
            69196, 68829, 67826, 61395, 63412]
len(cluster_2)

In [None]:
fig, axs = plt.subplots(13,4,figsize=(25,15))
row_i=0
column_j=0
for indice in cluster_2:
    a=np.where(indice_produit==indice)[0][0]
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==indice)[0][0]],myseries3[np.where(indice_produit==indice)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show

In [None]:
cluster_3=[69727, 65655, 69729, 65654, 60409, 61037, 70187, 60205, 60245,
            60337, 60248, 70188, 61027, 70189, 69733, 70191, 70192, 70194,
            70202, 63845, 64233, 65650, 60414, 60416, 60417, 60507, 60823,
            70211, 60250, 60256, 60261, 60263, 60272, 60273, 67020, 67453,
            69669, 69726, 70207, 60839, 60982, 70179, 60200, 70183, 62491,
            60201]
len(cluster_3)

In [None]:
fig, axs = plt.subplots(12,4,figsize=(25,15))
row_i=0
column_j=0
for indice in cluster_3:
    a=np.where(indice_produit==indice)[0][0]
    axs[row_i,column_j].plot(indice_temps[np.where(indice_produit==indice)[0][0]],myseries3[np.where(indice_produit==indice)[0][0]],c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        
plt.show

In [None]:
fig, axs = plt.subplots(8,4,figsize=(25,15))
row_i=0
column_j=0
for indice in cluster_1:
    a=len(myseries3[np.where(indice_produit==indice)[0][0]])
    axs[row_i,column_j].plot(range(min(a,79)),acf(myseries3[np.where(indice_produit==indice)[0][0]],nlags=min(a,79)-1),c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0
        

In [None]:
fig, axs = plt.subplots(13,4,figsize=(25,25))
row_i=0
column_j=0
for indice in cluster_2:
    a=len(myseries3[np.where(indice_produit==indice)[0][0]])
    axs[row_i,column_j].plot(range(min(a,79)),acf(myseries3[np.where(indice_produit==indice)[0][0]],nlags=min(a,79)-1),c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0

In [None]:
fig, axs = plt.subplots(12,4,figsize=(25,25))
row_i=0
column_j=0
for indice in cluster_3:
    a=len(myseries3[np.where(indice_produit==indice)[0][0]])
    axs[row_i,column_j].plot(range(min(a,79)),acf(myseries3[np.where(indice_produit==indice)[0][0]],nlags=min(a,79)-1),c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0

In [None]:
fig, axs = plt.subplots(8,4,figsize=(25,15))
row_i=0
column_j=0
for indice in cluster_1:
    a=len(myseries3[np.where(indice_produit==indice)[0][0]])
    axs[row_i,column_j].plot(range(min(a//2,79)),pacf(myseries3[np.where(indice_produit==indice)[0][0]],nlags=min(a//2,79)-1),c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0

In [None]:
fig, axs = plt.subplots(13,4,figsize=(25,25))
row_i=0
column_j=0
for indice in cluster_2:
    a=len(myseries3[np.where(indice_produit==indice)[0][0]])
    axs[row_i,column_j].plot(range(min(a//2,79)),pacf(myseries3[np.where(indice_produit==indice)[0][0]],nlags=min(a//2,79)-1),c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0

In [None]:
fig, axs = plt.subplots(12,4,figsize=(25,25))
row_i=0
column_j=0
for indice in cluster_3:
    a=len(myseries3[np.where(indice_produit==indice)[0][0]])
    axs[row_i,column_j].plot(range(min(a//2,79)),pacf(myseries3[np.where(indice_produit==indice)[0][0]],nlags=min(a//2,79)-1),c="blue")
    column_j+=1
    if column_j%4==0:
        row_i+=1
        column_j=0

In [None]:
labels_signature=[1, 1, 0, 2, 2, 2, 0, 0, 1, 1, 2, 2, 1, 2, 2, 1, 2, 2, 0, 2, 1, 1, 1, 1,
        1, 1, 0, 2, 0, 0, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 2, 2, 2, 2,
        0, 1, 1, 2, 2, 2, 0, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 0, 1, 1, 2, 1, 1,
        2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 1, 1,
        1, 1, 2, 0, 1]

In [None]:
a=indices_toujours_ensemble(c_trois,labels_signature)

In [None]:
sum([len(k) for k in a])

In [None]:
[len(k) for k in a]