In [1]:
import os
os.chdir('../..')

In [2]:
from platform import python_version
print(python_version())

3.7.9


In [3]:
import numpy as np
import pandas as pd

import tensorflow as tf

import matplotlib.pyplot as plt

from modules.datgan import DATWGAN

import networkx as nx
import json
import beepy

# For the Python notebook
%matplotlib inline
%reload_ext autoreload
%autoreload 2






In [4]:
dataset = 'LPMC'

In [5]:
df = pd.read_csv('../data/{}/data.csv'.format(dataset), index_col=False)

In [6]:
df.head()

Unnamed: 0,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,travel_year,travel_month,travel_date,day_of_week,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_n_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_con_charge,driving_traffic_percent
0,drive,HBO,Petrol_Car,child,0.0,1,2012,4,1,7,...,0.134444,0.0,0.016667,0.0,0,0.052222,0.0,0.14,0.0,0.111702
1,drive,HBO,Petrol_Car,free,0.0,1,2012,4,1,7,...,0.241389,0.0,0.122222,0.0,0,0.132222,0.0,0.5,0.0,0.065126
2,drive,HBO,Petrol_Car,full,1.0,1,2012,4,1,7,...,0.2575,0.0,0.873889,0.089444,1,0.508333,3.0,1.59,0.0,0.356831
3,pt,HBW,Average_Car,full,1.0,1,2012,4,1,7,...,0.123889,0.0,0.208056,0.091667,1,0.115556,3.0,0.33,0.0,0.033654
4,pt,HBO,Average_Car,free,0.0,1,2012,4,1,7,...,0.171389,0.0,0.334444,0.0,0,0.196389,0.0,0.53,0.0,0.035361


In [7]:
if dataset is 'Chicago':
    continuous_columns = ["distance", "age", "departure_time"]
elif dataset is 'LPMC':
    continuous_columns = ['start_time_linear', 'age', 'distance', 'dur_walking', 'dur_cycling', 'dur_pt_access', 'dur_pt_rail', 'dur_pt_bus', 'dur_pt_int', 'dur_driving', 'cost_transit', 'cost_driving_fuel', 'driving_traffic_percent']

In [8]:
# personalised graph
graph = nx.DiGraph()

if dataset is 'Chicago':
    graph.add_edges_from([
        ("age", "license"),
        ("age", "education_level"),
        ("gender", "work_status"),
        ("education_level", "work_status"),
        ("education_level", "hh_income"),
        ("work_status", "hh_income"),
        ("hh_income", "hh_descr"),
        ("hh_income", "hh_size"),
        ("hh_size", "hh_vehicles"),
        ("hh_size", "hh_bikes"),
        ("work_status", "trip_purpose"),
        ("trip_purpose", "departure_time"),
        ("trip_purpose", "distance"),
        ("travel_dow", "choice"),
        ("distance", "choice"),
        ("departure_time", "choice"),
        ("hh_vehicles", "choice"),
        ("hh_bikes", "choice"),
        ("license", "choice"),
        # Non necessary links
        ("education_level", "hh_size"),
        ("work_status", "hh_descr"),
        ("work_status", "hh_size"),
        ("hh_income", "hh_bikes"),
        ("hh_income", "hh_vehicles"),
        ("trip_purpose", "choice")
    ])
elif dataset is 'LPMC':
    graph.add_edges_from([
        ("travel_year", "survey_year"),
        ("travel_date", "day_of_week"),
        ("day_of_week", "purpose"),
        ("purpose", "start_time_linear"),
        ("purpose", "cost_driving_con_charge"),
        ("purpose", "distance"),
        ("day_of_week", "driving_traffic_percent"),
        ("day_of_week", "cost_driving_con_charge"),
        ("start_time_linear", "driving_traffic_percent"),
        ("start_time_linear", "cost_driving_con_charge"),
        ("driving_traffic_percent", "cost_driving_con_charge"),
        ("female", "driving_license"),
        ("age", "bus_scale"),
        ("age", "car_ownership"),
        ("age", "driving_license"),
        ("age", "faretype"),
        ("driving_license", "car_ownership"),
        ("car_ownership", "fueltype"),
        ("fueltype", "cost_driving_con_charge"),
        ("fueltype", "cost_driving_fuel"),
        ("distance", "cost_driving_fuel"),
        ("distance", "dur_driving"),
        ("distance", "dur_walking"),
        ("distance", "dur_cycling"),
        ("distance", "dur_pt_access"),
        ("distance", "dur_pt_rail"),
        ("distance", "dur_pt_bus"),
        ("distance", "dur_pt_int"),
        ("dur_pt_bus", "cost_transit"),
        ("dur_pt_rail", "cost_transit"),
        ("pt_n_interchanges", "dur_pt_int"),
        ("pt_n_interchanges", "cost_transit"),
        ("faretype", "cost_transit"),
        ("bus_scale", "cost_transit"),
        ("car_ownership", "travel_mode"),
        ("age", "travel_mode"),
        ("cost_driving_con_charge", "travel_mode"),
        ("driving_traffic_percent", "travel_mode"),
        ("female", "travel_mode"),
        ("purpose", "travel_mode"),
        ("cost_transit", "travel_mode"),
        ("cost_driving_fuel", "travel_mode"),
        ("dur_driving", "travel_mode"),
        ("dur_walking", "travel_mode"),
        ("dur_cycling", "travel_mode"),
        ("dur_pt_access", "travel_mode"),
        ("dur_pt_rail", "travel_mode"),
        ("dur_pt_bus", "travel_mode"),
        ("dur_pt_int", "travel_mode")
    ])
    graph.add_node("travel_month")

In [9]:
output_folder = '../output/' + dataset + '/TEST1/'

In [10]:
datgan = DATWGAN(continuous_columns, max_epoch=1000, batch_size=500, output=output_folder, gpu=0, learning_rate=2e-4)#, noisy_training='OR')

In [11]:
test = datgan.fit(df, graph)

[32m[1119 20:45:26 @DATSGAN.py:169][0m Preprocessing the data!
[32m[1119 20:45:26 @data.py:391][0m Encoding categorical variable "travel_year"...
[32m[1119 20:45:26 @data.py:391][0m Encoding categorical variable "travel_date"...
[32m[1119 20:45:26 @data.py:391][0m Encoding categorical variable "female"...
[32m[1119 20:45:26 @data.py:378][0m Encoding continuous variable "age"...
[32m[1119 20:45:26 @data.py:240][0m   Fitting model with 10 components
[32m[1119 20:45:36 @data.py:267][0m   Predictions were done on 8 components => Fit with 8 components!
[32m[1119 20:45:44 @data.py:267][0m   Predictions were done on 6 components => Fit with 6 components!
[32m[1119 20:45:51 @data.py:272][0m   Predictions were done on 6 components => FINISHED!
[32m[1119 20:45:51 @data.py:275][0m   Train VGM with full data
[32m[1119 20:46:04 @data.py:391][0m Encoding categorical variable "pt_n_interchanges"...
[32m[1119 20:46:04 @data.py:391][0m Encoding categorical variable "travel_month

KeyboardInterrupt: 

In [None]:
datgan.save('trained', force=True)

In [None]:
beepy.beep(6)