In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### From a scenario, decompose the .csv file into a .csv file for each segment

In [2]:
def get_age_class(age):
    """
    Returns the age class of the given age
    Parameters
    ----------
    age : int
        Age of the individual
    Returns
    -------
    str
        Age class of the individual
    """

    if age < 80 : 
        min_range =  age // 10  * 10
        max_range = age // 10  * 10 + 9
        return str(min_range) + ' - ' + str(max_range)
    if age > 79:
        return '80+'  

def get_segmentation_data(path, output_file):
    """
    Load and preprocess segmentation data into age classes and save it to a file
    Parameters
    ----------
    path : str
        Path to the segmentation data
    output_file : str
        Path to the output file
    Returns
    -------
    None
    """
    # Load data
    df = pd.read_csv(path)
    df['age_class'] = df.age.apply(get_age_class)

    for age_class in df['age_class'].unique():
        df[df['age_class']==age_class].to_csv(output_file + '_' + age_class + '.csv.gz', index=False)

    return None

In [11]:
get_segmentation_data('../data/abm/vaud/prepared/vaud_period_activities.csv.gz', '../data/abm/vaud/prepared/scenarios/scenario_1/vaud_period_activities' )

               id  type     facility  age               period age_class
0         1069770  home   home480932   85  1900-01-01 00:00:00       80+
1         1069770  home   home480932   85  1900-01-01 01:00:00       80+
2         1069770  home   home480932   85  1900-01-01 02:00:00       80+
3         1069770  home   home480932   85  1900-01-01 03:00:00       80+
4         1069770  home   home480932   85  1900-01-01 04:00:00       80+
...           ...   ...          ...  ...                  ...       ...
21355560  7134593  home  home3128753   14  1900-01-01 19:00:00   10 - 19
21355561  7134593  home  home3128753   14  1900-01-01 20:00:00   10 - 19
21355562  7134593  home  home3128753   14  1900-01-01 21:00:00   10 - 19
21355563  7134593  home  home3128753   14  1900-01-01 22:00:00   10 - 19
21355564  7134593  home  home3128753   14  1900-01-01 23:00:00   10 - 19

[21355565 rows x 6 columns]
