In [17]:
import os
from random import shuffle
from time import perf_counter

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio

pio.templates.default = "plotly_white"
from sklearn.metrics import confusion_matrix

from generate_dataset import generate_dataset
from part2_maxsat_k_classes import MaxSatSolver
from numpy.random import normal

In [18]:
solver = MaxSatSolver()


def assess_dataset(dataset, stats_df, parameter_name, parameter_value, noise=0):
    """
    Calcule les performance d'un solveur MaxSat sur un dataset de test donné avec les paramètre de bruit indiqués
    @param dataset: {(np.array()} Dataset de test
    @param stats_df: {pd.Dataframe} Dataframe dans lequel les stats seront ajoutées
    @param parameter_name: {str} Nom du paramètre évalué
    @param parameter_value: {int} Valeur du paramètre évalué
    @param noise: {float} Variance appliquée à la loi normale
    @return: {pd.Dataframe} Dataframe contenant les statistiques du modèle
    """
    S, y = dataset
    classes_count = len(set(y))
    subjects_count = S.shape[1]

    S = (S.astype(float) + normal(0, noise, S.shape)).astype(int)
    for i,row in enumerate(S):
        for j,value in enumerate(row):
            S[i,j] = max(0,S[i,j])
            S[i,j] = min(20,S[i,j])


    dataset = list(zip(S, y))

    shuffle(dataset)

    train = dataset[:len(dataset) // 2]
    test = dataset[len(dataset) // 2:]

    X_train, y_train = zip(*train)
    X_test, y_test = zip(*test)

    start = perf_counter()
    isSolvable, ignored_constraints, variables, constraints = solver.solve(np.array(X_train), y_train)
    end = perf_counter()

    if not isSolvable:
        return stats_df, 0

    ignored_constraints = int(ignored_constraints.replace("o ",""))

    contraints_keys = [item for item in constraints]

    separation_index = 0
    for index, elem in enumerate(contraints_keys[:-1]):
        if len(elem) > len(contraints_keys[index + 1]):
            separation_index = index + 1
            break

    marks_conditions = [item for item in contraints_keys[:separation_index] if constraints[item] == True]
    sufficient_subjects = [item for item in contraints_keys[separation_index:] if constraints[item] == True]

    sufficient_subjects_matrix = np.zeros((len(sufficient_subjects), S.shape[1]))

    for index, row in enumerate(sufficient_subjects):
        for subject in row:
            sufficient_subjects_matrix[index, subject] = 1 / len(row)

    marks_conditions_matrix = np.full((classes_count, subjects_count), 20)

    for subject, classe, mark in marks_conditions:
        marks_conditions_matrix[classe, subject] = min(mark, marks_conditions_matrix[classe, subject])

    pred = []

    for student_index, student in enumerate(X_test):
        for class_number, class_frontier in enumerate(marks_conditions_matrix[::-1]):
            for sufficient_coalition in sufficient_subjects_matrix:
                if (class_frontier <= student) @ sufficient_coalition >= 0.99:
                    pred.append(classes_count - class_number - 1)
                    break
            else:
                continue
            break
        else:
            pred.append(0)

    confusion_mat = confusion_matrix(y_test, pred, labels=[0, 1])
    TP = confusion_mat[0, 0]
    TN = confusion_mat[1, 1]
    FP = confusion_mat[0, 1]
    FN = confusion_mat[1, 0]

    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    duration = end - start
    raw_stats = {
        "precision": precision,
        "recall": recall,
        "accuracy": (TP + TN) / students_count,
        "F1 score": 2 * (precision * recall) / (precision + recall),
        "duration (in s)": duration,
        "ignored constraints": ignored_constraints
    }

    for key, value in raw_stats.items():
        stats_df = stats_df.append({"type": key, "value": value, parameter_name: parameter_value},
                                   ignore_index=True)

    return stats_df, duration

# Influence du bruit / capacité de généralisation

In [19]:
stats = pd.DataFrame(columns=["type", "value", "subjects_count"])
graphs_directory = "graphs"
classes_count = 2
subjects_count = 4
students_count = 300
elapsed_time = 0


for noise in np.arange(0, 20, 0.5):
    successful_iter = 0
    while successful_iter <= 10:
        if successful_iter == 0:
            print(f"{noise:.1f}", end=" ")
        dataset = generate_dataset(students_count * 2, subjects_count, classes_count, verbose=0)
        try:
            stats, elapsed_time = assess_dataset(dataset, stats, "écart-type", noise, noise=noise)
            successful_iter += 1
        except:
            ""
aggregated_stats = stats.groupby(["écart-type", "type"]).agg({"value": "mean"}).reset_index()

0.0 0.5 1.0 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.0 4.0 4.0 4.0 4.5 5.0 5.0 5.0 5.0 5.0 5.5 5.5 6.0 


invalid value encountered in longlong_scalars


invalid value encountered in longlong_scalars



6.5 6.5 7.0 7.0 7.5 8.0 


invalid value encountered in longlong_scalars


invalid value encountered in longlong_scalars



8.5 9.0 9.5 


invalid value encountered in longlong_scalars



10.0 10.0 10.0 10.5 11.0 


invalid value encountered in longlong_scalars



11.5 11.5 12.0 


invalid value encountered in longlong_scalars



12.5 12.5 12.5 


invalid value encountered in longlong_scalars


invalid value encountered in longlong_scalars



13.0 


invalid value encountered in longlong_scalars



13.5 


invalid value encountered in longlong_scalars



14.0 14.5 14.5 


invalid value encountered in longlong_scalars



15.0 15.5 16.0 16.5 17.0 


invalid value encountered in longlong_scalars



17.5 


invalid value encountered in longlong_scalars


invalid value encountered in longlong_scalars



18.0 18.5 19.0 19.5 


invalid value encountered in longlong_scalars


invalid value encountered in longlong_scalars


invalid value encountered in longlong_scalars



In [20]:
fig = px.line(aggregated_stats[~aggregated_stats["type"].isin(["duration (in s)","ignored constraints"])], x="écart-type", y="value",
              color="type",
              title=f"Model performance with noise applied thanks to a centered normal <br> law with students_count={students_count} and subjects_count={subjects_count}")

fig.show()
try:
    fig.write_image(f"{graphs_directory}/maxsat_perf_noise.png")
except:
    print("Please install kaleido (pip install -U kaleido) for static image export")

In [21]:
fig = px.line(aggregated_stats[(aggregated_stats["type"] == "ignored constraints")], x="écart-type", y="value",
              color="type",
              title=f"Ignored variables count with noise applied thanks to a centered normal <br> law with students_count={students_count} and subjects_count={subjects_count}")

fig.show()
try:
    fig.write_image(f"{graphs_directory}/maxsat_ignored_noise.png")
except:
    print("Please install kaleido (pip install -U kaleido) for static image export")