In [1]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

np.random.seed(0)
n_drivers = 100  # sampel pengemudi
n_locations = 150  # sampel kelurahan

# Data Pengemudi
drivers_data = {
    "experience": np.random.randint(1, 10, n_drivers),
    "preference": np.random.randint(0, n_locations, n_drivers)
}

# Data Lokasi
locations_data = {
    "demand_level": np.random.uniform(1, 5, n_locations),
    "competition": np.random.uniform(1, 3, n_locations),
    "safety": np.random.uniform(1, 5, n_locations)
}

# Data Perjalanan
travel_data = np.zeros((n_drivers, n_locations, 3))
for i in range(n_drivers):
    for j in range(n_locations):
        distance = np.abs(j - drivers_data['preference'][i]) + np.random.uniform(0, 5)
        time = distance / np.random.uniform(1, 3)
        cost = distance * np.random.uniform(0.5, 1.5)
        travel_data[i, j] = [distance, time, cost]

In [8]:
# Membuat data sintetis yang lebih realistis untuk model pilihan tujuan

# Menetapkan seed untuk konsistensi hasil
np.random.seed(0)

# Jumlah pengemudi dan kelurahan
n_drivers = 100  # Meningkatkan jumlah pengemudi
n_locations = 20  # Mengurangi jumlah lokasi untuk lebih memfokuskan pilihan

# Data Pengemudi
drivers_data = {
    "experience": np.random.randint(1, 10, n_drivers),
    "preference": np.random.randint(0, n_locations, n_drivers)
}

# Data Lokasi
locations_data = {
    "demand_level": np.random.uniform(1, 5, n_locations),
    "competition": np.random.uniform(1, 3, n_locations),
    "safety": np.random.uniform(1, 5, n_locations)
}

# Membuat distribusi pilihan yang lebih realistis
# Jarak: Lebih pendek untuk lokasi yang lebih disukai, dengan variasi acak
# Waktu dan Biaya: Berkorelasi dengan jarak
travel_data = np.zeros((n_drivers, n_locations, 3))
for i in range(n_drivers):
    for j in range(n_locations):
        # Membuat beberapa lokasi lebih populer
        popularity_factor = np.random.choice([0.5, 1, 1.5, 2], p=[0.1, 0.5, 0.3, 0.1])
        distance = np.abs(j - drivers_data['preference'][i]) * popularity_factor + np.random.uniform(1, 5)
        time = distance / np.random.uniform(1, 3)
        cost = distance * np.random.uniform(0.5, 1.5)
        travel_data[i, j] = [distance, time, cost]

# Mengonversi data ke DataFrame pandas
drivers_df = pd.DataFrame(drivers_data).set_index(pd.Index(range(n_drivers), name='driver_id'))
locations_df = pd.DataFrame(locations_data).set_index(pd.Index(range(n_locations), name='location_id'))
travel_df = pd.DataFrame(travel_data.reshape(n_drivers * n_locations, 3), columns=['distance', 'time', 'cost'])
travel_df['driver_id'] = np.repeat(range(n_drivers), n_locations)
travel_df['location_id'] = np.tile(range(n_locations), n_drivers)

drivers_df.head(), locations_df.head(), travel_df.head()

(           experience  preference
 driver_id                        
 0                   6           2
 1                   1           3
 2                   4           3
 3                   4          18
 4                   8          14,
              demand_level  competition    safety
 location_id                                     
 0                2.680301     1.100740  4.905618
 1                1.202352     2.398196  3.470632
 2                2.461984     2.984793  3.169995
 3                1.066512     1.534525  4.418454
 4                1.922969     2.358181  3.975338,
    distance      time      cost  driver_id  location_id
 0  4.701806  1.696989  5.547564          0            0
 1  3.933634  1.526156  2.869358          0            1
 2  2.254770  0.773532  2.191076          0            2
 3  3.114777  1.265715  3.570358          0            3
 4  3.745833  1.432353  4.671351          0            4)

In [2]:
# DataFrame Pengemudi
drivers_df = pd.DataFrame(drivers_data)
drivers_df['driver_id'] = range(n_drivers)
drivers_df = drivers_df.set_index('driver_id')

# DataFrame Lokasi
locations_df = pd.DataFrame(locations_data)
locations_df['location_id'] = range(n_locations)
locations_df = locations_df.set_index('location_id')

# DataFrame Data Perjalanan
travel_data_reshaped = travel_data.reshape(n_drivers * n_locations, 3)
travel_df = pd.DataFrame(travel_data_reshaped, columns=['distance', 'time', 'cost'])
travel_df['driver_id'] = np.repeat(range(n_drivers), n_locations)
travel_df['location_id'] = np.tile(range(n_locations), n_drivers)

In [3]:
def utility_function(driver_id, location_id, beta):
    driver = drivers_df.loc[driver_id]
    location = locations_df.loc[location_id]
    travel = travel_df[(travel_df.driver_id == driver_id) & (travel_df.location_id == location_id)].iloc[0]
    
    utility = (beta[0] * travel.distance +
               beta[1] * travel.time +
               beta[2] * travel.cost +
               beta[3] * location.demand_level +
               beta[4] * location.competition +
               beta[5] * location.safety +
               beta[6] * driver.experience +
               beta[7] * (driver.preference == location_id))
    return utility

In [4]:
def log_likelihood(beta):
    log_likelihood = 0
    for driver_id in range(n_drivers):
        utilities = np.array([utility_function(driver_id, location_id, beta) for location_id in range(n_locations)])
        log_likelihood += -np.log(np.sum(np.exp(utilities - np.max(utilities))))  # Stabilitas numerik
    return log_likelihood

In [9]:
initial_beta = np.zeros(8)
result = minimize(log_likelihood, initial_beta, method='Nelder-Mead')

KeyboardInterrupt: 

In [6]:
# Menghitung Statistik Tambahan
L0 = log_likelihood(np.zeros_like(initial_beta))
LL = result.fun
k = len(result.x)
rho_square = 1 - (LL / L0)
rho_square_adj = 1 - ((LL - k) / L0)
# std_err = np.sqrt(np.diag(np.linalg.inv(result.hess_inv)))
# t_stats = result.x / std_err

# Menampilkan Hasil
print("Optimized Beta Parameters:", result.x)
print("Log-Likelihood:", LL)
print("L(0):", L0)
print("Rho-Square:", rho_square)
print("Adjusted Rho-Square:", rho_square_adj)
# print("T-Statistic:", t_stats)

Optimized Beta Parameters: [0. 0. 0. 0. 0. 0. 0. 0.]
Log-Likelihood: -501.0635294096246
L(0): -501.0635294096246
Rho-Square: 0.0
Adjusted Rho-Square: -0.015966039295308487
