Some simple scripts to manage datas

In [2]:
import polars as pl

In [None]:
# Votes per measure
data = pl.read_csv("assets/evaluation/data_kano.csv", separator=";")

measure_count = data.get_column("mesure").value_counts()

measure_count.write_csv("assets/evaluation/num_votes_per_measure.csv")

print(measure_count)

shape: (12, 2)
┌────────┬───────┐
│ mesure ┆ count │
│ ---    ┆ ---   │
│ str    ┆ u32   │
╞════════╪═══════╡
│ m13    ┆ 9     │
│ m5     ┆ 9     │
│ m1     ┆ 9     │
│ m17    ┆ 7     │
│ m3     ┆ 7     │
│ …      ┆ …     │
│ m2     ┆ 6     │
│ m6     ┆ 9     │
│ m4     ┆ 9     │
│ m10    ┆ 9     │
│ m9     ┆ 9     │
└────────┴───────┘


In [29]:
import polars as pl
# Count votes per position per measure
data = pl.read_csv("assets/evaluation/data_kano.csv", separator=",")

measure_count = data.group_by(data.columns).agg(
    count=pl.len()
)

measure_count.write_csv("assets/evaluation/cont_votes_per_position_and_measure.csv")

print(measure_count)

shape: (63, 4)
┌────────┬─────────┬──────────┬───────┐
│ mesure ┆ absence ┆ presence ┆ count │
│ ---    ┆ ---     ┆ ---      ┆ ---   │
│ str    ┆ i64     ┆ i64      ┆ u32   │
╞════════╪═════════╪══════════╪═══════╡
│ m2     ┆ 1       ┆ 5        ┆ 1     │
│ m1     ┆ 1       ┆ 2        ┆ 1     │
│ m10    ┆ 4       ┆ 2        ┆ 4     │
│ m2     ┆ 1       ┆ 4        ┆ 1     │
│ m6     ┆ 4       ┆ 1        ┆ 2     │
│ …      ┆ …       ┆ …        ┆ …     │
│ m5     ┆ 3       ┆ 3        ┆ 1     │
│ m5     ┆ 3       ┆ 4        ┆ 1     │
│ m11    ┆ 3       ┆ 3        ┆ 1     │
│ m8     ┆ 3       ┆ 3        ┆ 1     │
│ m11    ┆ 5       ┆ 1        ┆ 2     │
└────────┴─────────┴──────────┴───────┘


In [41]:
import polars as pl

# Data lazy loading
data_df_lazy = pl.scan_csv("assets/evaluation/data_kano.csv") # Data with coordinates in kano matrix for each measure
categories_df_lazy = pl.scan_csv("assets/evaluation/kano_categories.csv") # Kano's labels for each coordinates
# Join the two dataframes to associates each data point from a measure with a Kano category label.
joined_df_lazy = data_df_lazy.join(
    categories_df_lazy,
    on=['absence', 'presence'],
    how='inner'
)
# Calculate the total number of data points for each measure for future percentage calculations.
measure_totals_lazy = joined_df_lazy.group_by('mesure').agg(
    pl.len().alias('total_per_measure')
)
# Count the occurrences of each category within each measure.
category_counts_lazy = joined_df_lazy.group_by('mesure', 'category').agg(
    pl.len().alias('category_count')
)
# Join the category counts with the measure totals.
results_lazy = category_counts_lazy.join(
    measure_totals_lazy,
    on='mesure',
    how='left'
)
# Calculate percentages
results_with_percentage_lazy = results_lazy.with_columns(
    (pl.col('category_count') / pl.col('total_per_measure') * 100).alias('percentage')
)
# Pivot the table to have measures as rows and categories as columns
final_pivot = results_with_percentage_lazy.collect().pivot(
    index='mesure',
    columns='category',
    values='percentage'
).fill_null(0)
final_pivot.write_csv("assets/evaluation/kano_matrix_table.csv")
print(final_pivot)

shape: (12, 7)
┌────────┬────────────┬─────────────┬───────────┬────────────┬──────────────┬───────────┐
│ mesure ┆ attractive ┆ performance ┆ reverse   ┆ indiferent ┆ questionable ┆ must-be   │
│ ---    ┆ ---        ┆ ---         ┆ ---       ┆ ---        ┆ ---          ┆ ---       │
│ str    ┆ f64        ┆ f64         ┆ f64       ┆ f64        ┆ f64          ┆ f64       │
╞════════╪════════════╪═════════════╪═══════════╪════════════╪══════════════╪═══════════╡
│ m10    ┆ 22.222222  ┆ 11.111111   ┆ 11.111111 ┆ 44.444444  ┆ 11.111111    ┆ 0.0       │
│ m13    ┆ 22.222222  ┆ 33.333333   ┆ 11.111111 ┆ 22.222222  ┆ 0.0          ┆ 11.111111 │
│ m1     ┆ 11.111111  ┆ 33.333333   ┆ 22.222222 ┆ 0.0        ┆ 0.0          ┆ 33.333333 │
│ m8     ┆ 11.111111  ┆ 11.111111   ┆ 0.0       ┆ 77.777778  ┆ 0.0          ┆ 0.0       │
│ m5     ┆ 0.0        ┆ 0.0         ┆ 22.222222 ┆ 77.777778  ┆ 0.0          ┆ 0.0       │
│ …      ┆ …          ┆ …           ┆ …         ┆ …          ┆ …            ┆ …      

  final_pivot = results_with_percentage_lazy.collect().pivot(
