In [None]:
import numpy as np
import pandas as pd

def simulate_nested_logit(
    I=100,            # Number of people (or markets)
    G=10,             # Number of nests
    prods_per_g=10,   # Products per nest
    beta=2.0,         # True slope parameter
    rho=0.5,          # Nest parameter in (0,1)
    sigma_x=1.0,      # Std dev of x_{ji}
    sigma_xi=1.0,     # Std dev of xi_{ji}
    seed=42
):
    """
    Simulates a nested-logit data set:
      - G nests, each with 'prods_per_g' products => total J = G*prods_per_g inside goods
      - I "people" or "markets"
      - For each i in {1,...,I} and each product j in {1,...,J}:
          x_{ji} ~ Normal(0, sigma_x),
          xi_{ji} ~ Normal(0, sigma_xi),
          delta_{ji} = beta * x_{ji} + xi_{ji}.
      - Then compute:
          s_{ji} = s_{g_j, i} * s_{j|g_j, i}
        where
          s_{g_j, i} = [ sum_{r in g_j} exp(delta_{r i}/rho) ]^rho
                        -----------------------------------------
                        1 + sum_{h=1}^G [ sum_{r in g_h} exp(delta_{r i}/rho) ]^rho

          s_{j|g_j, i} = exp( delta_{j i}/rho ) / sum_{r in g_j} exp( delta_{r i}/rho )

      - Outside option "1" is added to the denominator.
      - Returns a DataFrame with I*J rows containing all relevant columns.
    """
    np.random.seed(seed)
    
    J = G * prods_per_g  # total number of inside products
    
    # Assign each product j to a nest g_j
    # For convenience, let product j = 0..(J-1),
    #   then nest(g_j) = j // prods_per_g
    nest_for_product = []
    for j in range(J):
        nest_for_product.append(j // prods_per_g)
    nest_for_product = np.array(nest_for_product)  # shape=(J,)

    # We'll build a big list of rows for the final DataFrame
    rows = []
    
    # Loop over each person (or market) i
    for i in range(I):
        
        # 1. Simulate x_{j i} and xi_{j i}, build delta_{j i}
        x_ji = np.random.normal(0.0, sigma_x, size=J)
        xi_ji = np.random.normal(0.0, sigma_xi, size=J)
        delta_ji = beta*x_ji + xi_ji
        
        # 2. For each product j, compute exp( delta_{j i} / rho )
        exp_term = np.exp(delta_ji / rho)
        
        # 3. Compute the "within-nest" sums for each nest
        nest_sums = np.zeros(G)
        for g in range(G):
            # sum of exp( delta_{r i} / rho ) for r in nest g
            mask_g = (nest_for_product == g)
            nest_sums[g] = np.sum(exp_term[mask_g])
        
        # 4. Compute total inside sum = sum_{h=1..G} nest_sums[h]^rho
        total_inside_sum = np.sum(nest_sums**rho)
        
        # 5. Outside share s_{0,i} = 1 / (1 + total_inside_sum)
        s_0_i = 1.0 / (1.0 + total_inside_sum)
        
        # 6. For each nest g, the group share s_{g,i}
        s_g_i = (nest_sums**rho) / (1.0 + total_inside_sum)  # length G
        
        # 7. For each product j, s_{j i} = s_{g_j, i} * (exp(delta_{j i}/rho)/ nest_sums[g_j])
        s_ji = np.zeros(J)
        for j in range(J):
            g_j = nest_for_product[j]
            if nest_sums[g_j] > 0:
                s_ji[j] = s_g_i[g_j] * (exp_term[j] / nest_sums[g_j])
            else:
                s_ji[j] = 0.0
        
        # Collect rows for each product in this person
        for j in range(J):
            g_j = nest_for_product[j]
            row_dict = {
                'person' : i,
                'product': j,
                'nest'   : g_j,
                'x_ji'   : x_ji[j],
                'xi_ji'  : xi_ji[j],
                'delta_ji': delta_ji[j],
                's_ji'   : s_ji[j],
                's_gi'   : s_g_i[g_j],  # group (nest) share for nest g_j
                's_0i'   : s_0_i
            }
            rows.append(row_dict)
    
    # Convert to DataFrame
    df = pd.DataFrame(rows)
    return df


if __name__=="__main__":

    df = simulate_nested_logit(
        I=100, G=10, prods_per_g=10,
        beta=2.0, rho=0.5,
        sigma_x=1.0, sigma_xi=1.0,
        seed=42
    )
    print(df.head(15))
    print("\nNumber of rows:", len(df))
    print("Columns:", list(df.columns))

    # Example: compute overall average share of the outside good
    avg_outside_share = df[['person','s_0i']].drop_duplicates()['s_0i'].mean()
    print(f"\nAverage outside share across the 100 people: {avg_outside_share:.4f}")


In [None]:
import numpy as np
import pandas as pd

def simulate_nested_logit(
    I=100,            # Number of people (or markets)
    G=10,             # Number of nests
    prods_per_g=10,   # Products per nest
    beta=2.0,         # True slope parameter
    rho=0.5,          # Nest parameter in (0,1)
    sigma_x=1.0,      # Std dev of x_{ji}
    sigma_xi=1.0,     # Std dev of xi_{ji}
    seed=42
):
    """
    Simulates a nested-logit data set:
      - G nests, each with 'prods_per_g' products => total J = G*prods_per_g inside goods
      - I "people" or "markets"
      - For each i in {1,...,I} and each product j in {1,...,J}:
          x_{ji} ~ Normal(0, sigma_x),
          xi_{ji} ~ Normal(0, sigma_xi),
          delta_{ji} = beta * x_{ji} + xi_{ji}.
      - Then compute:
          s_{ji} = s_{g_j, i} * s_{j|g_j, i}
        where
          s_{g_j, i} = [ sum_{r in g_j} exp(delta_{r i}/rho) ]^rho
                        -----------------------------------------
                        1 + sum_{h=1}^G [ sum_{r in g_h} exp(delta_{r i}/rho) ]^rho

          s_{j|g_j, i} = exp( delta_{j i}/rho ) / sum_{r in g_j} exp( delta_{r i}/rho )

      - Outside option "1" is added to the denominator.
      - Returns a DataFrame with I*J rows containing all relevant columns.
    """
    np.random.seed(seed)
    
    J = G * prods_per_g  # total number of inside products
    
    # Assign each product j to a nest g_j
    # For convenience, let product j = 0..(J-1),
    #   then nest(g_j) = j // prods_per_g
    nest_for_product = []
    for j in range(J):
        nest_for_product.append(j // prods_per_g)
    nest_for_product = np.array(nest_for_product)  # shape=(J,)

    # We'll build a big list of rows for the final DataFrame
    rows = []
    
    # Loop over each person (or market) i
    for i in range(I):
        
        # 1. Simulate x_{j i} and xi_{j i}, build delta_{j i}
        x_ji = np.random.normal(0.0, sigma_x, size=J)
        xi_ji = np.random.normal(0.0, sigma_xi, size=J)
        delta_ji = beta*x_ji + xi_ji
        
        # 2. For each product j, compute exp( delta_{j i} / rho )
        exp_term = np.exp(delta_ji / rho)
        
        # 3. Compute the "within-nest" sums for each nest
        nest_sums = np.zeros(G)
        for g in range(G):
            # sum of exp( delta_{r i} / rho ) for r in nest g
            mask_g = (nest_for_product == g)
            nest_sums[g] = np.sum(exp_term[mask_g])
        
        # 4. Compute total inside sum = sum_{h=1..G} nest_sums[h]^rho
        total_inside_sum = np.sum(nest_sums**rho)
        
        # 5. Outside share s_{0,i} = 1 / (1 + total_inside_sum)
        s_0_i = 1.0 / (1.0 + total_inside_sum)
        
        # 6. For each nest g, the group share s_{g,i}
        s_g_i = (nest_sums**rho) / (1.0 + total_inside_sum)  # length G
        
        # 7. For each product j, s_{j i} = s_{g_j, i} * (exp(delta_{j i}/rho)/ nest_sums[g_j])
        s_ji = np.zeros(J)
        for j in range(J):
            g_j = nest_for_product[j]
            if nest_sums[g_j] > 0:
                s_ji[j] = s_g_i[g_j] * (exp_term[j] / nest_sums[g_j])
            else:
                s_ji[j] = 0.0
        
        # Collect rows for each product in this person
        for j in range(J):
            g_j = nest_for_product[j]
            row_dict = {
                'person' : i,
                'product': j,
                'nest'   : g_j,
                'x_ji'   : x_ji[j],
                'xi_ji'  : xi_ji[j],
                'delta_ji': delta_ji[j],
                's_ji'   : s_ji[j],
                's_gi'   : s_g_i[g_j],  # group (nest) share for nest g_j
                's_0i'   : s_0_i
            }
            rows.append(row_dict)
    
    # Convert to DataFrame
    df = pd.DataFrame(rows)
    return df


if __name__=="__main__":

    df = simulate_nested_logit(
        I=100, G=10, prods_per_g=10,
        beta=2.0, rho=0.5,
        sigma_x=1.0, sigma_xi=1.0,
        seed=42
    )
    print(df.head(15))
    print("\nNumber of rows:", len(df))
    print("Columns:", list(df.columns))

    # Example: compute overall average share of the outside good
    avg_outside_share = df[['person','s_0i']].drop_duplicates()['s_0i'].mean()
    print(f"\nAverage outside share across the 100 people: {avg_outside_share:.4f}")
