# Dataframe introduction
Let's create a dataframe manually

In [87]:
%use dataframe, kandy

//source: https://greenlab.di.uminho.pt/wp-content/uploads/2017/09/paperSLE.pdf
val languageEnergy = dataFrameOf(
    "language" to listOf("C","Rust","C++","Java","Go","Python"),
    "efficiency" to listOf(1.0,1.03,1.34,1.98,3.23,75.88)
    )

languageEnergy

language,efficiency
C,1.0
Rust,1.03
C++,1.34
Java,1.98
Go,3.23
Python,75.88


In [88]:
languageEnergy.sortBy{efficiency}.plot { 
    bars {
        x(language)
        y(efficiency)
    }
}

## Example with OpenData
The following examples rely on French regions' Gaz and Electricity consumption from 2011 to 2021.
https://www.data.gouv.fr/fr/datasets/consommation-annuelle-delectricite-et-gaz-par-region-et-par-code-naf/


In [89]:

val openDataUrl = "https://www.data.gouv.fr/fr/datasets/r/d33eabc9-e2fd-4787-83e5-a5fcfb5af66d"
val df = DataFrame.readCSV(fileOrUrl=openDataUrl, delimiter = ';')
df

operateur,annee,filiere,code_categorie_consommation,libelle_categorie_consommation,code_grand_secteur,libelle_grand_secteur,code_naf,libelle_secteur_naf2,conso,pdl,indqual,nombre_mailles_secretisees,code_region,libelle_region
Enedis,2020,Electricité,ENT,Entreprises,T,Tertiaire,72.0,Recherche-développement scientifique,135719.2771,131,0.995,0,84,Auvergne-Rhône-Alpes
Enedis,2020,Electricité,ENT,Entreprises,T,Tertiaire,75.0,Activités vétérinaires,1204.307968,23,0.885,0,84,Auvergne-Rhône-Alpes
Enedis,2020,Electricité,ENT,Entreprises,T,Tertiaire,77.0,Activités de location et location-bail,17371.36323,231,0.915,0,84,Auvergne-Rhône-Alpes
Enedis,2020,Electricité,ENT,Entreprises,T,Tertiaire,88.0,Action sociale sans hébergement,106935.3465,797,0.942,0,84,Auvergne-Rhône-Alpes
Enedis,2020,Electricité,ENT,Entreprises,T,Tertiaire,94.0,Activités des organisations associatives,62186.18198,582,0.926,0,84,Auvergne-Rhône-Alpes
Enedis,2020,Electricité,ENT,Entreprises,X,Secteur Inconnu,,0,6634.348055,121,0.859,0,84,Auvergne-Rhône-Alpes
Enedis,2020,Electricité,PRO,Petits professionels,A,Agriculture,,0,390022.7856,34533,0.586,0,84,Auvergne-Rhône-Alpes
Enedis,2020,Electricité,PRO,Petits professionels,X,Secteur Inconnu,,0,228033.5269,37973,0.496,0,84,Auvergne-Rhône-Alpes
Enedis,2020,Electricité,RES,Résidentiel,R,Résidentiel,,0,18964849.17,4152567,0.646,0,84,Auvergne-Rhône-Alpes
Enedis,2020,Electricité,ENT,Entreprises,A,Agriculture,1.0,0,165938.5212,1388,0.956,0,93,Provence-Alpes-Côte d'Azur


In [90]:
val byRegionEnergy = df.groupBy{libelle_region and filiere}.aggregate { sum { conso } into "consoSum" }
byRegionEnergy

libelle_region,filiere,consoSum
Auvergne-Rhône-Alpes,Electricité,643510119.699964
Provence-Alpes-Côte d'Azur,Electricité,387894421.775597
Non affecté à une région,Electricité,498520.789568
Occitanie,Electricité,392210303.789791
Grand Est,Electricité,425345949.780854
Nouvelle-Aquitaine,Electricité,400302768.732576
Île-de-France,Electricité,723900936.188867
Normandie,Electricité,269870010.348831
Hauts-de-France,Electricité,470861965.464838
Bourgogne-Franche-Comté,Electricité,209201014.941887


In [91]:
byRegionEnergy.plot {
    layout{
        size = 1024 to 600
    }
    bars {
        x(libelle_region)
        y(consoSum)
        fillColor(filiere) {
            scale = categorical(
                "Gaz" to Color.hex("#6F4E37"),
                "Electricité" to Color.hex("#C2D4AB"),
            )
        }
    }
}

In [92]:
val byCategoryAndEnergy = df.groupBy{libelle_categorie_consommation and filiere}
    .aggregate { sum { conso } into "consoSum" }
    .replace{libelle_categorie_consommation}
    .with { it.map { if (it.equals("0")) "N/A" else it.toString() } }

byCategoryAndEnergy

libelle_categorie_consommation,filiere,consoSum
Entreprises,Electricité,2390801171.63436
Petits professionels,Electricité,166865512.536687
Résidentiel,Electricité,653793422.425228
,Electricité,80714529.41
Résidentiel,Gaz,536672320.49
,Gaz,43575798.614
Entreprises,Gaz,3214425282.51099
Petits professionels,Gaz,97986297.68028
Résidentiel + Petits Professionnels,Electricité,1395002440.611
Résidentiel + Petits Professionnels,Gaz,1246357958.43


In [93]:
plotGrid(
    listOf(
        byCategoryAndEnergy.filter { filiere.equals("Electricité") }.plot {
            pie {
                slice(consoSum)
                fillColor(libelle_categorie_consommation){
                    legend.name="Category"
                }
                size = 20.0
            }
            layout {
                title = "Electricity consumption by category"
                style(Style.Void)
            }
        },
        byCategoryAndEnergy.filter { filiere.equals("Gaz") }.plot {
            pie {
                slice(consoSum)
                fillColor(libelle_categorie_consommation){
                    legend.name="Category"
                }
                
                size = 20.0
            }
            layout {
                title = "Gaz consumption by category"
                style(Style.Void)
            }
        }
    )
)

In [94]:
import org.jetbrains.letsPlot.core.spec.back.transform.bistro.util.scale

val byYearAndEnergy = df.groupBy{df.annee and df.filiere}.aggregate { sum { conso }  into "conso" }
byYearAndEnergy.plot {
    line { 
        x(annee){
            axis{
                breaks(format = "{d}")
            }
        }
        y(conso)
        color(filiere)
    }
}