In [None]:
from choice_learn.datasets import load_modecanada

dataset = load_modecanada(as_frame=True)
dataset.head()

In [None]:
# Let's check one specific situation
dataset.loc[dataset.case == 1]

In [3]:
from choice_learn.data import ChoiceDataset

choice_dataset = ChoiceDataset.from_single_long_df(
  df=dataset,
  shared_features_columns=["income"],
  items_features_columns=["cost", "ivt", "ovt", "freq"],
  items_id_column="alt",
  choices_id_column="case",
  choices_column="choice",
  choice_format="one_zero",
)

In [None]:
from choice_learn.models import ConditionalLogit

# Initialize the model
model = ConditionalLogit()

# Create the different weights:
# Shared coefficients apply to all items in the list
# (e.g., price, frequency, and ovt)
model.add_shared_coefficient(feature_name="cost",
  items_indexes=[0, 1, 2, 3])

model.add_shared_coefficient(feature_name="freq",
  coefficient_name="beta_frequence",
  items_indexes=[0, 1, 2, 3])

model.add_shared_coefficient(feature_name="ovt",
  items_indexes=[0, 1, 2, 3])

# 'ivt' has a separate coefficient for each item
model.add_coefficients(feature_name="ivt",
  items_indexes=[0, 1, 2, 3])

# Add intercept and income coefficients,
# applied to all items except the first one (which is zeroed)
model.add_coefficients(feature_name="intercept",
  items_indexes=[1, 2, 3])

model.add_coefficients(feature_name="income",
  items_indexes=[1, 2, 3])

In [None]:
history = model.fit(choice_dataset, get_report=True, verbose=2)
model.report

In [None]:
import matplotlib.pyplot as plt

shared_features_by_choice = [choice_dataset.shared_features_by_choice[0][2237]]*10
items_features_by_choice = [choice_dataset.items_features_by_choice[0][2237].copy() for _ in range(10)]

colors = ["#9ae1e2", "#332851", "#ca3074", "#f6c677"]
values = []
for i in range(10):
    items_features_by_choice[i][1][0] = 10 * i
    values.append(items_features_by_choice[i][1][0])
play_dataset = ChoiceDataset(
    shared_features_by_choice = shared_features_by_choice,
    items_features_by_choice = items_features_by_choice,
    choices = canada_dataset.choices[:10]
)

probs = model.predict_probas(play_dataset)

items = ['bus', "car", "train"]
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.fill_between(values, [0 for _ in values], probs[:, 0], label=f"air", color=colors[0])
for i in range(1, 4):
    plt.fill_between(values, np.sum(probs[:, :i], axis=1), np.sum(probs[:, :i+1], axis=1), label=f"{items[i-1]}", color=colors[i])
plt.xlabel("Bus Ticket Price")
plt.ylabel("Probability of choice")
plt.legend()


shared_features_by_choice = [canada_dataset.shared_features_by_choice[0][2237].copy() for _ in range(10)]
items_features_by_choice = [canada_dataset.items_features_by_choice[0][2237].copy() for _ in range(10)]
for i in range(10):
    shared_features_by_choice[i][0] += 10 * i
play_dataset = ChoiceDataset(
    shared_features_by_choice = shared_features_by_choice,
    items_features_by_choice = items_features_by_choice,
    choices = canada_dataset.choices[:10]
)

probs = model.predict_probas(play_dataset)

plt.subplot(1, 2, 2)
plt.fill_between(values, [0 for _ in values], probs[:, 0], label=f"air", color=colors[0])
for i in range(1, 4):
    plt.fill_between(values, np.sum(probs[:, :i], axis=1), np.sum(probs[:, :i+1], axis=1), label=f"{items[i-1]}", color=colors[i])
    
plt.xlabel("Customer Income")
plt.ylabel("Probability of choice")
plt.legend()