In [5]:
#Langzeitmodel - Bevölkerungssynthese
import os.path
import pandas as pd
from os.path import join
import matplotlib.style

# Ergebnisordner des Langzeitmoduls
_path = r"C:\Users\devin.feng\Desktop\Git\mobitopp-rastatt\output\rastatt\demand-data"

### Analyse der Haushalte
_HH = pd.read_csv(join(_path, "household.csv"), sep=";")
# Haushalts-Attribute
household_attributes = list(_HH.columns)
df = pd.DataFrame(household_attributes)
# Anzahl Haushalte
num_of_HH = len(_HH)
# Haushalte pro Zelle
HH_per_zone = _HH.homeZone.value_counts()
# Anzahl der Pkw pro Haushalt
cars_per_household = _HH.totalNumberOfCars.value_counts()

# print("HH Attributes Spalten: ", household_attributes)
# print("Num. HHs: ", num_of_HH)
# print("Avg. HH pro Zelle: ", HH_per_zone.mean())
# print("Anzahl Zellen: ", len(HH_per_zone))
# print("Anzahl der Pkw pro Haushalt: ", cars_per_household)

### Analyse der Personen
_persons = pd.read_csv(join(_path, "person.csv"), sep=";")
person_attributes = list(per_suffix.columns) # Personen-Attribute
num_of_persons = len(per_suffix) # Anzahl Personen
mean_age = per_suffix.age.mean() # Alter
gender_distr = per_suffix.gender.value_counts(normalize=True) # Geschlecht
employment_distr = per_suffix.employment.value_counts() # Beschäftigung
household_size_distr = per_suffix.groupby("householdId").size().value_counts() #Anzahl Per pro HH

### Auswertung Mobilitaetswerkzeuge
ticket_distr = per_suffix.hasCommuterTicket.value_counts() # OeV-Zeitkarte
license_distr = per_suffix.hasLicense.value_counts() # Fuehrerschein

### Verschneiden der Personen- und Haushaltstabelle
_merged = pd.merge(per_suffix,_HH, suffixes=('_per','_hh'), on="householdId") # Note, adds suffix to repeated vars (income) and removes the redundant householdID field
_age = _merged.age #Abkürzung für das 'age' attribut

persons_per_zone = _merged.homeZone.value_counts() # Verteilung der Minderjaehrigen bzw. Volljaehrigen in die Verkehrszellen
minors_per_zone = _merged[_age < 18].homeZone.value_counts()
adults_per_zone = _merged[_age >= 18].homeZone.value_counts()

# print("Person Attributes Spalten: ", person_attributes)
# print("Anzahl Personen: ", num_of_persons)
# print("Durchschnittsalter: ", mean_age)

per_suffix.head(20).to_csv(os.path.join('output/generated_csvs', 'persons_df.csv'),sep=';', index=False)
_HH.head(20).to_csv(os.path.join('output/generated_csvs', 'hh_df.csv'),sep=';', index=False)
_merged.head(20).to_csv(os.path.join('output/generated_csvs', 'merged_df.csv'),sep=';', index=False)

In [9]:
### Strukturgrößen: attributes (Name, Type, Min-Max, Uniques)
import pandas as pd

# Ger unique volumnes from dataframes
hh_suffix = _HH.add_suffix('_hh')
per_suffix = _persons.add_suffix('_per')

# Combine columns
cols = list(hh_suffix.columns) + list(per_suffix.columns)

# Populate metadata
col_dtypes = [str(hh_suffix[col].dtype) if col.endswith('_hh') else str(per_suffix[col].dtype) for col in cols]
#
# df_meta = pd.DataFrame({'Column': list(col_dtypes.keys()), 'Datatype': list(col_dtypes.values())})
metadata = []
for col in cols:
    if col.endswith('_hh'):
        series = hh_suffix[col]
        dtype = hh_suffix[col].dtype
    else:
        series = per_suffix[col]
        dtype = per_suffix[col].dtype
    if pd.api.types.is_numeric_dtype(series):
        lower = series.min()
        upper = series.max()
        if series.nunique() < 20:
            uniques = series.unique()
        else:
            uniques = series.nunique()
    else:
        lower = series.min()
        upper = series.max()
        if series.nunique() < 20:
            uniques = series.unique()
        else:
            uniques = series.nunique()
    metadata.append({
        'Attribut': col, 'Quelle': 'HH' if '_hh' in col else 'Per', 'Datentyp': dtype, 'Untergrenze': lower, 'Obergrenze': upper, 'Eindeutig (# wenn >20 Kategorien)': uniques})

# Apply metadata extraction
df_meta = pd.DataFrame(metadata)
df_meta.to_csv(os.path.join('output/generated_csvs', 'df_meta.csv'),sep=';', index=False)


In [None]:
### Wege (Kurzzeit-Modul)
import pandas as pd
from os.path import join
import matplotlib.style
import matplotlib.pyplot as plt

(matplotlib.style.use('fivethirtyeight'))

_path = r"C:\Users\devin.feng\Desktop\Git\mobitopp-rastatt\output"
_short_term_path = join(_path, 'rastatt/results/simulation') # muss 'rastatt/results/simulation' sein (Handbuch falsch)
_long_term_path = join(_path, 'rastatt/demand-data')
### Einstieg (Uebersicht ueber die Daten gewinnen) ###
_trip_file = join(_short_term_path, 'demandsimulationResult.csv')
_trips = pd.read_csv(_trip_file, sep=";")
# Attribute (columns) und Kopf der Trip-Tabelle
trip_attributes = _trips.columns
trips_head = _trips.head()

num_of_trips = len(_trips) # Anzahl aller Wege
# Verteilung der Wege auf die Personen
_trips_per_person = _trips.groupby('personOid').size()
_trips_per_person.hist(bins=10, rwidth=0.9)#Histogramm-Plot
plt.xlabel('Anzahl Wege in der Woche (?)')
plt.ylabel('Personen')
plt.savefig('Wege pro Woche.png', bbox_inches='tight')
plt.close()

num_of_trips_distr = _trips_per_person.value_counts()


In [None]:
### Modal Split (Kurzzeit-Modul), Wegbezogen ####

# Einfache Berechnung
modal_split_abs =_trips.tripMode.value_counts()
modal_split_rel =_trips.tripMode.value_counts(normalize=True)
# Funktion zur Berechnung des Modal Split erstellen
# Zweck: folgenden Code uebersichtlicher halten
def modal_split(trip_df):
    return trip_df.tripMode.value_counts(normalize=True)
test_modal_split_funtion = modal_split(_trips)
# Modal Split fuer Wege innerhalb Rastatt
# (Start und Ziel in Zellen mit Nr. kleiner 6200)
_sourceZone = _trips.sourceZone
_targetZone = _trips.targetZone
modal_split_in_rastatt = modal_split(_trips[(_sourceZone < 6200)
& (_targetZone < 6200)])
# Modal Split auf Arbeitswegen
_activityType = _trips.activityType
modal_split_work = modal_split(_trips[_activityType == 1])
