In [1]:
%maven org.dflib:dflib-jupyter:2.0.0-M2
%maven org.dflib:dflib-csv:2.0.0-M2

## Import dependencies

In [2]:
// Import the DFLib libraries
import org.dflib.*;
import org.dflib.series.*;
import org.dflib.print.*;
import java.util.*;

## Create sample data

In [3]:
// Create sample data
Map<String, List<?>> programmingLanguages = new HashMap<>();
programmingLanguages.put("name", Arrays.asList("Java", "Python", "JavaScript", "C#", "Go"));
programmingLanguages.put("popularity", Arrays.asList(16.8, 36.2, 9.2, 6.3, 4.2));
programmingLanguages.put("year", Arrays.asList(1995, 1991, 1995, 2000, 2009));

// Convert map to Series
String[] columns = programmingLanguages.keySet().toArray(new String[0]);
Series<?>[] seriesArray = new Series[columns.length];

for (int i = 0; i < columns.length; i++) {
    seriesArray[i] = Series.of(programmingLanguages.get(columns[i]));
}

// Create DataFrame
DataFrame df = DataFrame.byColumn(columns).of(seriesArray);

// Show the DataFrame
System.out.println("Created DataFrame:");
TabularPrinter printer = new TabularPrinter();
StringBuilder sb = new StringBuilder();
printer.print(sb, df);
System.out.println(sb.toString());

Created DataFrame:

year                           popularity                  name                          
------------------------------ --------------------------- ------------------------------
[1995, 1991, 1995, 2000, 2009] [16.8, 36.2, 9.2, 6.3, 4.2] [Java, Python,..cript, C#, Go]
1 row x 3 columns


## Describe the DataFrame

In [4]:
// Describe the DataFrame
System.out.println("DataFrame: " + df.width() + " columns × " + df.height() + " rows");
System.out.println("\nColumns:");

for (String col : df.getColumnsIndex()) {
    Series<?> series = df.getColumn(col);
    String typeInfo = series.getClass().getSimpleName();
    
    // Get sample values
    List<String> samples = new ArrayList<>();
    for (int i = 0; i < Math.min(3, series.size()); i++) {
        Object value = series.get(i);
        samples.add(value == null ? "null" : value.toString());
    }
    
    System.out.printf("  %-20s %-20s Sample: [%s]\n", 
        col, typeInfo, String.join(", ", samples));
}

// Show first 5 rows
System.out.println("\nFirst 5 rows:");
TabularPrinter printer = new TabularPrinter();
StringBuilder sb = new StringBuilder();
printer.print(sb, df.head(5));
System.out.println(sb.toString());

// Show numeric summaries for numeric columns
System.out.println("\nNumeric Summaries:");
for (String col : df.getColumnsIndex()) {
    Series<?> series = df.getColumn(col);
    
    try {
        if (series instanceof DoubleSeries) {
            DoubleSeries ds = (DoubleSeries) series;
            System.out.printf("  %-20s min: %10.2f  max: %10.2f  mean: %10.2f  sum: %10.2f\n",
                col, ds.min(), ds.max(), ds.avg(), ds.sum());
        } else if (series instanceof IntSeries) {
            IntSeries is = (IntSeries) series;
            System.out.printf("  %-20s min: %10d  max: %10d  mean: %10.2f  sum: %10d\n",
                col, is.min(), is.max(), is.avg(), is.sum());
        } else if (series instanceof LongSeries) {
            LongSeries ls = (LongSeries) series;
            System.out.printf("  %-20s min: %10d  max: %10d  mean: %10.2f  sum: %10d\n",
                col, ls.min(), ls.max(), ls.avg(), ls.sum());
        }
    } catch (Exception e) {
        // Skip non-numeric columns or ones that can't be summarized
    }
}

DataFrame: 3 columns × 1 rows

Columns:
  year                 ArraySeries          Sample: [[1995, 1991, 1995, 2000, 2009]]
  popularity           ArraySeries          Sample: [[16.8, 36.2, 9.2, 6.3, 4.2]]
  name                 ArraySeries          Sample: [[Java, Python, JavaScript, C#, Go]]

First 5 rows:

year                           popularity                  name                          
------------------------------ --------------------------- ------------------------------
[1995, 1991, 1995, 2000, 2009] [16.8, 36.2, 9.2, 6.3, 4.2] [Java, Python,..cript, C#, Go]
1 row x 3 columns

Numeric Summaries:


## Sort by popularity (descending)

In [5]:
// Sort by popularity (descending)
DataFrame sortedByPopularity = df.sort("popularity", false);

System.out.println("Languages sorted by popularity (descending):");
TabularPrinter printer = new TabularPrinter();
StringBuilder sb = new StringBuilder();
printer.print(sb, sortedByPopularity);
System.out.println(sb.toString());

Languages sorted by popularity (descending):

year                           popularity                  name                          
------------------------------ --------------------------- ------------------------------
[1995, 1991, 1995, 2000, 2009] [16.8, 36.2, 9.2, 6.3, 4.2] [Java, Python,..cript, C#, Go]
1 row x 3 columns


## Sort by year (ascending)

In [6]:
// Sort by year (ascending)
DataFrame sortedByYear = df.sort("year", true);

System.out.println("Languages sorted by year (ascending):");
TabularPrinter printer = new TabularPrinter();
StringBuilder sb = new StringBuilder();
printer.print(sb, sortedByYear);
System.out.println(sb.toString());

Languages sorted by year (ascending):

year                           popularity                  name                          
------------------------------ --------------------------- ------------------------------
[1995, 1991, 1995, 2000, 2009] [16.8, 36.2, 9.2, 6.3, 4.2] [Java, Python,..cript, C#, Go]
1 row x 3 columns


## Calculate percentages

In [8]:
// Calculate total popularity
double totalPopularity = 0;
Series<?> popularitySeries = df.getColumn("popularity");
for (int i = 0; i < popularitySeries.size(); i++) {
    Object value = popularitySeries.get(i);
    if (value instanceof Number) {
        totalPopularity += ((Number) value).doubleValue();
    } else if (value instanceof List) {
        // Handle case where the value is a List
        List<?> list = (List<?>) value;
        for (Object item : list) {
            if (item instanceof Number) {
                totalPopularity += ((Number) item).doubleValue();
            }
        }
    }
}

// Create a new DataFrame with all existing columns plus the percentage column
String[] columns = new String[df.width() + 1];
Series<?>[] series = new Series[df.width() + 1];

// Copy existing columns
for (int i = 0; i < df.width(); i++) {
    String colName = df.getColumnsIndex().get(i);
    columns[i] = colName;
    series[i] = df.getColumn(colName);
}

// Create a Series for the percentage column
double finalTotal = totalPopularity; // Need final for lambda
List<Double> percentages = new ArrayList<>();

// Calculate percentages
for (int i = 0; i < popularitySeries.size(); i++) {
    Object value = popularitySeries.get(i);
    double popularity = 0;
    
    if (value instanceof Number) {
        popularity = ((Number) value).doubleValue();
    } else if (value instanceof List) {
        // Handle case where the value is a List
        List<?> list = (List<?>) value;
        for (Object item : list) {
            if (item instanceof Number) {
                popularity += ((Number) item).doubleValue();
            }
        }
    }
    
    double percentage = (popularity / finalTotal) * 100;
    percentages.add(percentage);
}

// Add the new percentage column
columns[df.width()] = "percentage";
series[df.width()] = Series.of(percentages);

// Create the new DataFrame
DataFrame withPercentage = DataFrame.byColumn(columns).of(series);

System.out.println("Languages with percentage of total popularity:");
TabularPrinter printer = new TabularPrinter();
StringBuilder sb = new StringBuilder();
printer.print(sb, withPercentage);
System.out.println(sb.toString());

Languages with percentage of total popularity:

year                           popularity                  name                           percentage
------------------------------ --------------------------- ------------------------------ ----------
[1995, 1991, 1995, 2000, 2009] [16.8, 36.2, 9.2, 6.3, 4.2] [Java, Python,..cript, C#, Go] [100.0]   
1 row x 4 columns
