# Translate demand files format: from .dat to flow.csv

In [3]:
import sys
import pandas as pd
import xml.etree.ElementTree as ET
import datetime
import re
import nltk
import numpy
import os

from IPython.display import display, HTML
low_memory=False

In [4]:
PATH="data/OD_MADRID_v2"
SCENARIO="madrid_barrio_salamanca_od"
SCENARIO="madrid_las_tablas_od"
SCENARIO="madrid_retiro_od"

file_nodes="{}/{}_nodes.csv".format(PATH,SCENARIO)
file_edges="{}/{}_edgeids.csv".format(PATH,SCENARIO)
file_weights="{}/{}_weights.csv".format(PATH,SCENARIO)
file_speeds="{}/{}_speeds.csv".format(PATH,SCENARIO)
file_lengths="{}/{}_lengths.csv".format(PATH,SCENARIO)
file_demands="{}/{}_source_demand.dat".format(PATH,SCENARIO)

In [5]:
DEST_DIR=PATH
out_file_flows="{}/{}_flows_NEW.csv".format(DEST_DIR,SCENARIO)

In [6]:
def isNaN(num):
    return num != num

In [163]:
# https://www.saltycrane.com/blog/2008/01/how-to-invert-dict-in-python/
def invert_dict(d):
    return dict([(value, key) for key, value in d.items()]) 

In [18]:
def demand_dat_2_csv( file_nodes, file_demands, out_file_flows ):

    low_memory=False

    sep=","
    demsep=";"
  
    #---------------------------------------------------
    # Load sources
    #---------------------------------------------------
    # Load nodes
    print("Reading nodes file: "+file_nodes)
    dfn = pd.read_csv(file_nodes, encoding='utf-8', sep=sep )
    dfn.columns.values[0] = "node_name"
    
    # Load DEMANDS
    print("Reading DEMAND file: "+file_demands)
    dfd = pd.read_csv(file_demands, encoding='utf-8', sep=demsep )

    #---------------------------------------------------
    # Generating flows file
    #---------------------------------------------------
    print("Generating flows file: "+out_file_flows)
    df2 = dfn[['node_name']]
    for col in dfn['node_name'].tolist():
        df2[str(col)]=0

    nr = len(df2.columns)
    nnodes={}
    inodes={}
    for x in range(1,nr):
       nname='{}'.format(df2.columns[x])
       nnodes[nname]=x-1
       inodes[x-1]=nname
        
    for x, rowData in dfd.iterrows():
        nfrom = str(rowData[0])
        if( nfrom[0] == 'N'):
            nfrom  = nfrom[1:]

        nto = str(rowData[1])
        if( nto[0] == 'N'):
            nto  = nto[1:]

        demand = rowData[2]
        df2.iloc[nnodes[nfrom],nnodes[nto]]=demand

    df2.to_csv(out_file_flows, encoding='utf-8', index=False, sep=sep)
    return df2

In [19]:
df= demand_dat_2_csv( file_nodes, file_demands, out_file_flows)

Reading nodes file: data/OD_MADRID_v2/madrid_retiro_od_nodes.csv
Reading DEMAND file: data/OD_MADRID_v2/madrid_retiro_od_source_demand.dat
Generating flows file: data/OD_MADRID_v2/madrid_retiro_od_flows_NEW.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


# BACKUP

In [13]:
sep=','
demsep=';'
dfd = pd.read_csv(file_demands, encoding='utf-8', sep=demsep )
dfn = pd.read_csv(file_nodes, encoding='utf-8', sep=sep)
dfn.columns.values[0] = "node_name"
df2 = dfn[['node_name']]
dfd.head()

Unnamed: 0,origin,dest,demand
0,1760085654,1493669240,200
1,1760085654,1950481127,200
2,1760085654,1950481126,200
3,1760085654,1950481115,200
4,1760085654,1505184332,200


In [9]:
for col in dfn['node_name'].tolist():
    df2[str(col)]=0

def cast(x):
  str(x)

df2['node_name']=df2['node_name'].apply(str)
df2.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,node_name,1209330272,1209330640,1209330887,1209332387,1278838932,1278838937,1278838975,1278838976,1278838994,...,4576234828,554166728,942080445,942080448,942080449,942080459,942080469,942080477,996183822,996184088
0,1209330272,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1209330640,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1209330887,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1209332387,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1278838932,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
nr = len(df2.columns)
nnodes={}
inodes={}
for x in range(1,nr):
   nname='{}'.format(df2.columns[x])
   nnodes[nname]=x-1
   inodes[x-1]=nname
nnodes['1209332387']

3

In [11]:
df2.iloc[nnodes['1209330272'],nnodes['1209332387']]=999
df2

Unnamed: 0,node_name,1209330272,1209330640,1209330887,1209332387,1278838932,1278838937,1278838975,1278838976,1278838994,...,4576234828,554166728,942080445,942080448,942080449,942080459,942080469,942080477,996183822,996184088
0,1209330272,0,0,999,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1209330640,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1209330887,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1209332387,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1278838932,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,1278838937,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,1278838975,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,1278838976,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,1278838994,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,1278839005,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [267]:
df2.loc['1209330272','1209332387']=999
df2

Unnamed: 0,node_name,1209330272,1209330640,1209330887,1209332387,1278838932,1278838937,1278838975,1278838976,1278838994,...,4576234828,554166728,942080445,942080448,942080449,942080459,942080469,942080477,996183822,996184088
0,1209330272,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1209330640,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1209330887,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1209332387,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1278838932,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,1278838937,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,1278838975,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,1278838976,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,1278838994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,1278839005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [227]:
dfn['node_name'].tolist()

[1209330272,
 1209330640,
 1209330887,
 1209332387,
 1278838932,
 1278838937,
 1278838975,
 1278838976,
 1278838994,
 1278839005,
 1278839019,
 1278839025,
 1278839038,
 1278839042,
 1278839084,
 1278839092,
 1278839131,
 1278839146,
 1493669240,
 1505184312,
 1505184313,
 1505184314,
 1505184315,
 1505184316,
 1505184318,
 1505184320,
 1505184321,
 1505184322,
 1505184323,
 1505184324,
 1505184326,
 1505184327,
 1505184328,
 1505184329,
 1505184330,
 1505184331,
 1505184332,
 1505184334,
 1555729951,
 1555729961,
 1555729967,
 1555729985,
 171946,
 1760085654,
 1854720781,
 1900998825,
 1900998827,
 1915012227,
 1915012228,
 1915012230,
 1950481105,
 1950481115,
 1950481116,
 1950481121,
 1950481125,
 1950481126,
 1950481127,
 1957490899,
 1963430768,
 1963430771,
 1973690386,
 1973690392,
 1973690393,
 1981925766,
 1981925767,
 1981925770,
 2005647264,
 20953243,
 20953244,
 21527321,
 21627170,
 21627306,
 2377018002,
 2383701843,
 2383701844,
 2383701845,
 2383701846,
 2383701847,


In [131]:
rows = invert_dict(dfw[dfw.columns[0]].apply(str).to_dict())
"SI" if '1209330272' in rows else "NO"
#rows[0]
#rows


'SI'

In [140]:
dfw.head()

Unnamed: 0.1,Unnamed: 0,1209330272,1209330640,1209330887,1209332387,1278838932,1278838937,1278838975,1278838976,1278838994,...,942080469,942080477,996183822,996184088,1,139,144,171,57,98
0,1209330000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,,,,,,
1,1209331000.0,7.420446,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,,,,,,
2,1209331000.0,0.0,0.0,0.0,5.940965,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,,,,,,
3,1209332000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,,,,,,
4,1278839000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,,,,,,


In [143]:
for col in dfw.columns[1:]:
    dfw[col].values[:] = 0

In [198]:
dfw[dfw.columns[0:3]].head().to_csv

Unnamed: 0,1209330272,1209330640
0,0.0,0.0
1,7.420446,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0


In [137]:
cols = dfw.columns
for x, rowData in dfd.iterrows():
    if( rowData[0][0] == 'N'):
        nfrom  = rowData[0][1:]
    else:
        nfrom  = rowData[0]

    if( rowData[1][0] == 'N'):
        nto  = rowData[1][1:]
    else:
        nto  = rowData[1]
    nto_idx = rows[nto]
    
    # display(HTML("nfrom={} >>> {}".format(nfrom,"True" if nfrom in cols else "False")))
    # display(HTML("nto  ={} >>> {}".format(nto,"True" if nto in rows else "False")))
    prev   = dfw.loc[nto_idx,nfrom]
    demand = numpy.float64(rowData[2])
    dfw.loc[nfrom,nto_idx]  = demand
    display(HTML("[{},{}] >>> {} >>> {}".format(nfrom,nto,prev,demand)))
    # prev= dfw.loc[nfrom,nto_idx]
    # demand = numpy.float64(777.0)
    # dfw.loc[nfrom,nto_idx] = demand
    # display(HTML("[{},{}] >>> {} >>> {}".format(nfrom,nto,prev,demand)))

In [111]:
dfw.head()

Unnamed: 0.1,Unnamed: 0,1209330272,1209330640,1209330887,1209332387,1278838932,1278838937,1278838975,1278838976,1278838994,...,554166728,942080445,942080448,942080449,942080459,942080469,942080477,996183822,996184088,98
0,1209330000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1,1209331000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2,1209331000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
3,1209332000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
4,1278839000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
