In [None]:
import numpy as np
import pandas as pd
import pylogit as pl

In a imagined city, there are only 3 people. When travelling to work, they can choice to use Car, Train, or Bicycle. Each mode is encoded as a number:

* Mode: Car (1), Bicycle (2), Train (3)

Each individual prefer a travel mode due to its travel time and health perspective.

* Person 1 (person_id = 1): prefer Car (mode=1)
* Person 2 (person_id = 2): prefer Train (mode=3)
* Person 3 (person_id = 3): prefer Bicycle (mode=2)

How do we represent these preferences in a long or wide data format?


In short, Wide format store each person preference per Row, while Long format store each alternative per Row

### Wide format

In [None]:
# Person_1 travels to work by car (15 minnutes), train (30 minutes), and bike (45 miniutes).
# Person_1 perefer car (mode = 1)
person_1 = pd.DataFrame([[1, 15, 30, 45, 1]], 
                        columns=['person_id', 'car_time', 'train_time', 'bike_time' ,'mode_choice'])
person_1

Unnamed: 0,person_id,car_time,train_time,bike_time,mode_choice
0,1,15,30,45,1


In [None]:
# Person_2 travels to work by car (30 minnutes), train (40 minutes), and bike (60 miniutes).
# Person_2 perefer train (mode = 3)
person_2 = pd.DataFrame([[2, 30, 40, 60, 3]], 
                        columns=['person_id', 'car_time', 'train_time', 'bike_time' ,'mode_choice'])
person_2

Unnamed: 0,person_id,car_time,train_time,bike_time,mode_choice
0,2,30,40,60,3


In [None]:
# Person_3 travels to work by car (10 minnutes), train (15 minutes), and bike (20 miniutes).
# Person_3 perefer cycling (mode = 2)
person_3 = pd.DataFrame([[3, 10, 15, 20, 2]], 
                        columns=['person_id', 'car_time', 'train_time', 'bike_time' ,'mode_choice'])
person_3

Unnamed: 0,person_id,car_time,train_time,bike_time,mode_choice
0,3,10,15,20,2


In [None]:
# Store these data in a Wide format
df_wide = pd.concat([person_1, person_2, person_3], axis=0).reset_index(drop=True)
df_wide

Unnamed: 0,person_id,car_time,train_time,bike_time,mode_choice
0,1,15,30,45,1
1,2,30,40,60,3
2,3,10,15,20,2


### Long format

A long format stores each person's Alternative (mode) and its Choice (Yes/No or 1/0) per Row.

In [None]:
# Person_1 travels to work by car (15 minnutes), train (30 minutes), and bike (45 miniutes).
# Person_1 perefer car (mode = 1)
person_1 = pd.DataFrame([[1, 1, 15, 1],
                         [1, 2, 45, 0],
                         [1, 3, 30, 0],], 
                        columns=['person_id', 'mode', 'time', 'is_choosen'])
person_1

Unnamed: 0,person_id,mode,time,is_choosen
0,1,1,15,1
1,1,2,45,0
2,1,3,30,0


In [None]:
# Person_2 travels to work by car (30 minnutes), train (40 minutes), and bike (60 miniutes).
# Person_2 perefer train (mode = 3)
person_2 = pd.DataFrame([[2, 1, 30, 0],
                         [2, 2, 60, 0],
                         [2, 3, 40, 1],], 
                        columns=['person_id', 'mode', 'time', 'is_choosen'])
person_2

Unnamed: 0,person_id,mode,time,is_choosen
0,2,1,30,0
1,2,2,60,0
2,2,3,40,1


In [None]:
# Person_3 travels to work by car (10 minnutes), train (15 minutes), and bike (20 miniutes).
# Person_3 perefer cycling (mode = 2)
person_3 = pd.DataFrame([[3, 1, 10, 0],
                         [3, 2, 20, 1],
                         [3, 3, 15, 0],], 
                        columns=['person_id', 'mode', 'time', 'is_choosen'])
person_3

Unnamed: 0,person_id,mode,time,is_choosen
0,3,1,10,0
1,3,2,20,1
2,3,3,15,0


In [None]:
# Store these data in a Long format
df_long = pd.concat([person_1, person_2, person_3], axis=0).reset_index(drop=True)
df_long

Unnamed: 0,person_id,mode,time,is_choosen
0,1,1,15,1
1,1,2,45,0
2,1,3,30,0
3,2,1,30,0
4,2,2,60,0
5,2,3,40,1
6,3,1,10,0
7,3,2,20,1
8,3,3,15,0


In [None]:
df_wide

Unnamed: 0,person_id,car_time,train_time,bike_time,mode_choice
0,1,15,30,45,1
1,2,30,40,60,3
2,3,10,15,20,2


### Convert Long format to Wide format

In [None]:
ind_vars = [] # Individual specific variables
alt_vars = ['time'] # alternative specific variables
obs_cols = 'person_id' # observation/person/individual id column
alt_id   = "mode"
chc_cols = 'is_choosen' # Choice column (Yes/No) or (1/0) values
alt_names = {1:'Car',
             2:'Bike',
             3:'Train'}

In [None]:
df_wide_tfm = pl.convert_long_to_wide(df_long, ind_vars=ind_vars, alt_specific_vars=alt_vars,
                                      subset_specific_vars={}, obs_id_col=obs_cols,
                                      alt_id_col=alt_id,choice_col=chc_cols, alt_name_dict=alt_names)
df_wide_tfm

Unnamed: 0,person_id,is_choosen,availability_Car,availability_Bike,availability_Train,time_Car,time_Bike,time_Train
0,1,1,1,1,1,15.0,45.0,30.0
1,2,3,1,1,1,30.0,60.0,40.0
2,3,2,1,1,1,10.0,20.0,15.0


In [None]:
df_wide

Unnamed: 0,person_id,car_time,train_time,bike_time,mode_choice
0,1,15,30,45,1
1,2,30,40,60,3
2,3,10,15,20,2


### Convert Wide format to Long format

In [None]:
ind_vars = []
alt_vars = {'travel_time': {1:'car_time',
                            2:'bike_time',
                            3:'train_time'}}

# alt_id = "mode_choice"
choice = "mode_choice"

In [None]:
obs_col = "custom_id"
df_wide[obs_col] = np.arange(df_wide.shape[0], dtype=int) + 1


In [None]:
df_wide['car_av'] = 1 # Car availablity
df_wide['bike_av'] = 1 # Bike availablity
df_wide['train_av'] = 1 # Train availablity
avai_vars = {1:'car_av', 2:'bike_av', 3:'train_av'} # If it is None => Error!!!

In [None]:
df_long_tfm = pl.convert_wide_to_long(df_wide, ind_vars, alt_vars, avai_vars,
                                      obs_col, choice)
df_long_tfm



Unnamed: 0,custom_id,alt_id,mode_choice,travel_time
0,1,1,1,15
1,1,2,0,45
2,1,3,0,30
3,2,1,0,30
4,2,2,0,60
5,2,3,1,40
6,3,1,0,10
7,3,2,1,20
8,3,3,0,15


In [None]:
df_wide

Unnamed: 0,person_id,car_time,train_time,bike_time,mode_choice,custom_id,car_av,bike_av,train_av
0,1,15,30,45,1,1,1,1,1
1,2,30,40,60,3,2,1,1,1
2,3,10,15,20,2,3,1,1,1


In [None]:
df_long

Unnamed: 0,person_id,mode,time,is_choosen
0,1,1,15,1
1,1,2,45,0
2,1,3,30,0
3,2,1,30,0
4,2,2,60,0
5,2,3,40,1
6,3,1,10,0
7,3,2,20,1
8,3,3,15,0
