In [30]:
from diagrams import Cluster, Diagram, Edge
from diagrams.k8s.storage import PVC, PV
from diagrams.aws.database import Aurora
from diagrams.gcp.iot import IotCore
from diagrams.gcp.analytics import BigQuery, Dataflow, PubSub
from diagrams.gcp.database import BigTable
from diagrams.gcp.compute import AppEngine, Functions
from diagrams.aws.database import Redshift, ElastiCache
from diagrams.gcp.storage import GCS

from pylab import *

import graphviz

In [77]:
graph_attr = {
    "bgcolor": "#F7FBFC", 
    "dpi" : "300",
    "fontname" : "Times-Roman",
    "fontsize" : "50"
}

cluster_attr = {
    "bgcolor": "#D6E6F2", ##E8E8E8 couleur du background hors cluster, none = black, transparent = black
    "fontsize" : "40"
}

subcluster_attr = {
    "bgcolor": "#B9D7EA", #BBBFCA Ccouleur du background hors cluster, none = black, transparent = black
    "fontsize" : "35"
}

picto_attr = {
    "bgcolor": "#A6D0E4", ##BBBFCAcouleur du background hors cluster, none = black, transparent = black
    "fontsize" : "30"
}
#"clusterrank" : "global"

with Diagram("Construction du Dataset",filename="Flowchart", show=True, outformat="jpg", graph_attr=graph_attr):
    
    with Cluster("DataSet Original",graph_attr=cluster_attr):
        with Cluster("Concatenation des mois",graph_attr=subcluster_attr):
            with Cluster("Fichiers mensuels",graph_attr=picto_attr):
                mois = [GCS(),
                GCS(),
                GCS()]
        with Cluster("DataSet Concaténé",graph_attr=subcluster_attr):
                dsconcat = BigTable()
                 

        
        with Cluster("Nettoyage des données",graph_attr=subcluster_attr):
            with Cluster("Sélection des variables",graph_attr=picto_attr):
                sel = dsconcat \
                    >> Edge(color="darkgrey", style="bold") \
                    >> IotCore()
            with Cluster("Traitement des NA",graph_attr=picto_attr):
                tna = dsconcat \
                    >> Edge(color="darkgrey", style="bold") \
                    >> IotCore()
            with Cluster("Traitement des outliers",graph_attr=picto_attr):
                toul = dsconcat \
                    >> Edge(color="darkgrey", style="bold") \
                    >> IotCore()
            with Cluster("Recherche des données manquantes",graph_attr=picto_attr):
                dman = dsconcat \
                    >> Edge(color="darkgrey", style="bold") \
                    >> IotCore()

        with Cluster("DataSet nettoyé",graph_attr=subcluster_attr):
                dsfullclean = Aurora()


    with Cluster("Intégration de données supplémentaires",graph_attr=cluster_attr):
        with Cluster("Elaboration des hypothèses",graph_attr=subcluster_attr):
            inthyp = Dataflow()

        with Cluster("Semaines spéciales",graph_attr=subcluster_attr):
            sem = inthyp \
                >> Edge(color="darkgrey", style="bold") \
                >> Redshift()
        with Cluster("Promotions",graph_attr=subcluster_attr):
            prom = inthyp \
                >> Edge(color="darkgrey", style="bold") \
                >> Redshift()
        with Cluster("Températures moyennes saisonnières",graph_attr=subcluster_attr):
            temp = inthyp \
                >> Edge(color="darkgrey", style="bold") \
                >> Redshift()
        with Cluster("Vacances scolaires",graph_attr=subcluster_attr):
            vac = inthyp \
                >> Edge(color="darkgrey", style="bold") \
                >> Redshift()      
        with Cluster("Jours fériés",graph_attr=subcluster_attr):
            jferies = inthyp \
                >> Edge(color="darkgrey", style="bold") \
                >> Redshift()
        with Cluster("Espace temps",graph_attr=subcluster_attr):
            esptemp = inthyp \
                >> Edge(color="darkgrey", style="bold") \
                >> Redshift()

        
    with Cluster("DS final",graph_attr=cluster_attr):
        dsfinal = ElastiCache()

    with Cluster("Visualisation et analyses",graph_attr=cluster_attr):
        dsanalyses = Redshift()
    
    mois \
        >> Edge(color="darkgrey", style="bold") \
        >> dsconcat

    sel\
        >> Edge(color="darkgrey", style="bold") \
        >> dsfullclean
    
    tna\
        >> Edge(color="darkgrey", style="bold") \
        >> dsfullclean

    toul\
        >> Edge(color="darkgrey", style="bold") \
        >> dsfullclean

    dman\
        >> Edge(color="darkgrey", style="bold") \
        >> dsfullclean
        
    dsfullclean\
        >> Edge(color="darkgrey", style="bold") \
        >> inthyp
    
    esptemp \
        >> Edge(color="darkgrey", style="bold") \
        >> dsfinal

    jferies \
        >> Edge(color="darkgrey", style="bold") \
        >> dsfinal

    vac \
        >> Edge(color="darkgrey", style="bold") \
        >> dsfinal

    temp \
        >> Edge(color="darkgrey", style="bold") \
        >> dsfinal

    prom \
        >> Edge(color="darkgrey", style="bold") \
        >> dsfinal

    sem \
        >> Edge(color="darkgrey", style="bold") \
        >> dsfinal
    
    
    dsfinal \
        >> Edge(color="darkgrey", style="bold") \
        >> dsanalyses



In [None]:
# with Diagram("flowchart DS 2", show=True):

#     with Cluster("Intégration de données supplémentaires"):
#         inthyp = Dataflow("Intégration des hypothèses")
# #         with Cluster("Espace temps"):
# #             flow >> Functions("La position du jour influe-t-elle sur les volumes ?") >> Redshift("Variables jours/semaines dans espace temps")
# #         with Cluster("Jours fériés"):
# #             flow >> Functions("La proximité d'un jour férié (passé ou à venir) influe-t-elle sur les volumes ?") >> Redshift("Variables prox jour férié")
# #         with Cluster("Vacances scolaires"):
# #             flow >> Functions("Les vacances scolaires (par zone) influecent-elles les volumes ?") >> Redshift("Variables vacances")
# #         with Cluster("Températures moyennes saisonnières"):
# #             flow >> Functions("Les températures des régions de livraison influencent-elles les volumes ?") >> Redshift("Variables températures")
# #         with Cluster("Promotions"):
# #             flow >> Functions("Les promotions influencent-elles les volumes ?") >> Redshift("Variables promotions")
# #         with Cluster("Semaines spéciales"):
# #             flow >> Functions("Les semaines spéciales influencent-elles les volumes ?") >> Redshift("Variables semaines spéciales")
#         with Cluster("Semaines spéciales"):
#             sem = inthyp >> Redshift()
#         with Cluster("Promotions"):
#             prom = inthyp >> Redshift()
#         with Cluster("Températures moyennes saisonnières"):
#             temp = inthyp >> Redshift()
#         with Cluster("Vacances scolaires"):
#             vac = inthyp >> Redshift()      
#         with Cluster("Jours fériés"):
#             jferies = inthyp >> Redshift()
#         with Cluster("Espace temps"):
#             esptemp = inthyp >> Redshift()

        
#     dsfinal = ElastiCache("DS final")
    
#     dsanalyses = Redshift("Visualisation et analyses")
    
#     dsoriginal_group >> dsconcat >> dscleaning >> dsfullclean >> inthyp 
#     esptemp >> dsfinal
#     jferies >> dsfinal
#     vac >> dsfinal
#     temp >> dsfinal
#     prom >> dsfinal
#     sem >> dsfinal
    
    
#     dsfinal >> dsanalyses

In [None]:
graph = {
  bgcolor="lightblue"
  label="Home"
  subgraph cluster_ground_floor {
    bgcolor="lightgreen"
    label="Ground Floor"
    Lounge
    Kitchen
  }
  subgraph cluster_top_floor {
    bgcolor="lightyellow"
    label="Top Floor"
    Bedroom
    Bathroom
  }
}

SyntaxError: invalid syntax (<ipython-input-12-66d9ee168ca3>, line 2)