In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [2]:
# File containing the activities as extracted from MATSIM
_EXTRACTED_ACTIVITIES_PATH_ = "../data/abm/vaud/extracted/vaud_activities.csv.gz"
_PERIOD_ACTIVITIES_REP_ = "../data/abm/vaud/prepared/scenarios/scenario_4"
_POLICIES_PATH_ = '../data/abm/vaud/prepared/scenarios/scenario_4/scenario_4.xlsx'
_PERIOD_PATH_ = '../data/abm/vaud/prepared/scenarios/scenario_4/vaud_period_activities_restricted.csv.gz'
_PERIOD_ACTIVITIES_ = '../data/abm/vaud/prepared/scenarios/scenario_4/vaud_period_activities'

# Files to where the ID to index translations should be saved
_AGENTS_ID_TRANSLATIONS_FILE_ = "../data/abm/vaud/prepared/vaud_agents_id_translations.csv.gz"
_FACILITIES_ID_TRANSLATIONS_FILE_ = "../data/abm/vaud/prepared/vaud_facilities_id_translations.csv.gz"


In [3]:
def get_age_class(age):
    # Returns the age class of the given age
    if age < 80 : 
        min_range =  age // 10  * 10
        max_range = age // 10  * 10 + 9
        return str(min_range) + ' - ' + str(max_range)
    if age > 79:
        return '80+'  
    

def get_house_id(df):
#Returns a dictionnary with the house number for each individual 

    df1 = df[df.type =='home']
    df1 = df1.drop_duplicates(subset=["id"])
    df1 = df1.set_index('id')

    return df1[['facility']].to_dict()

## Open Activity Data

In [4]:
# Loads the activities per period as a single dataframe (might be very large !)
period_activities = pd.read_csv('../data/abm/vaud/prepared/vaud_period_activities.csv.gz')
#period_activities = pd.read_excel('../data/abm/vaud/20230318_Activities_test_v001.xlsx')

period_activities['age_class'] = period_activities.age.apply(get_age_class)

period_activities.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21355565 entries, 0 to 21355564
Data columns (total 6 columns):
 #   Column     Dtype 
---  ------     ----- 
 0   id         int64 
 1   type       object
 2   facility   object
 3   age        int64 
 4   period     object
 5   age_class  object
dtypes: int64(2), object(4)
memory usage: 977.6+ MB


We'll prcocess the periods successively: for each period, we'll isolate the activities that occurred during that time; then translate the facility and agent; and finally save the result to a specific file.  

## Open policies

In [5]:
policies = pd.read_excel(_POLICIES_PATH_)

facility_types = policies.Type.unique() #[ 'shop', 'leisure']
age_groups = policies.Age_class.unique()  #[85,70]

print(age_groups)



['0 - 9' '10 - 19' '20 - 29' '30 - 39' '40 - 49' '50 - 59' '60 - 69'
 '70 - 79' '80+']


In [6]:
def apply_restricting_policies(activities, policies):

    """   
    This function applies the policies on the activities dataframe
    
    Parameters: 
    activities (pandas.DataFrame): DataFrame containing activity information
    policies (pandas.DataFrame): DataFrame containing policy information

    output : the activities dataframe with the policies applied

    """
    #Change the dividing times as parameter of the simulation (t1, t2)
    #Do the reduction on the whole 30 days dataframe  

    facility_types = policies.Type.unique() #example: [ 'shop', 'leisure']
    age_groups = policies.Age_class.unique() #example: [85,70]
    temp_activities = activities.copy() #copy of the activities dataframe
    house_id = get_house_id(activities) #dictionnary with the house number for each individual


    for age in age_groups:
        for facility in facility_types:
            try:
                cond = (policies.Age_class == age) & (policies.Type == facility)
                theta = policies[cond]['Reduction'].iloc[ 0] #takes theta for the policy period
                if theta ==0: #if theta is 0, no change of activities
                    break

                # finds the activities of the age class and facility and apply reduction on a random subset of activities
                cond_activities = (temp_activities.type==facility) & (temp_activities.age_class==age)
                df_update = temp_activities.sample(frac=theta, replace=False, random_state=2).loc[cond_activities] # samples a random subset of activities with probability theta
                

                # finds the activities of the age class and facility and apply reduction on a random subset of activities
                cond_activities = (temp_activities.type==facility) & (temp_activities.age_class==age)
                df_update = temp_activities.sample(frac=theta, replace=False, random_state=2).loc[cond_activities] # samples a random subset of activities with probability theta
                df_update.type = 'home' # replace the type of activity by 'home'
                df_update.facility = df_update.id.map(house_id['facility']) # replace the facility number by the house number

                temp_activities.update(df_update) #updates the dataframe
                update_list = df_update.index.tolist() #list of the updated activities

            except IndexError:
              print(f"No policy found for facility {facility}, age {age}")

    return temp_activities


new_activities = apply_restricting_policies(period_activities, policies)
new_activities.head(20)

new_activities.to_csv(_PERIOD_PATH_, index=False)


In [4]:
def get_age_class(age):
    """
    Returns the age class of the given age
    Parameters
    ----------
    age : int
        Age of the individual
    Returns
    -------
    str
        Age class of the individual
    """

    if age < 80 : 
        min_range =  age // 10  * 10
        max_range = age // 10  * 10 + 9
        return str(min_range) + ' - ' + str(max_range)
    if age > 79:
        return '80+'  

def get_segmentation_data(path, output_file):
    """
    Load and preprocess segmentation data into age classes and save it to a file
    Parameters
    ----------
    path : str
        Path to the segmentation data
    output_file : str
        Path to the output file
    Returns
    -------
    None
    """
    # Load data
    df = pd.read_csv(path)
    df['age_class'] = df.age.apply(get_age_class)

    for age_class in df['age_class'].unique():
        df[df['age_class']==age_class].to_csv(output_file + '_' + age_class + '.csv.gz', index=False)
    #rename_files(output_file)
    return None


In [5]:
get_segmentation_data(_PERIOD_PATH_, _PERIOD_ACTIVITIES_ )

In [6]:
def rename_files(folder):
    for filename in os.listdir(folder):
        name = filename.replace('.0', '')
        os.rename(folder + '/' + filename, folder + '/' + name)

rename_files(_PERIOD_ACTIVITIES_REP_)