### Get average long traffic for Wednesday and Sunday

In [13]:
import io
import pandas as pd
import numpy as np
import random
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import os

In [14]:
def arrivals_each_case(site, day, p_HGV=0.4,p_eHGV = 0.2,p_eHGV_stop = 0.2,num_cases=9,target_column='long_eHGV_stop'):
    # Read the CSV file
    df = pd.read_csv(f"site{site}_day{day}_long_traffic.csv", index_col=0)   
    df_avg = df.mean(axis=1)  # Compute mean across columns for each row

    # Convert df_avg from a Series to a DataFrame and rename the column
    df_avg = pd.DataFrame(df_avg)
    df_avg.columns = ['avg_long']  # Rename the column to 'avg_long'

    # Compute the long_HGV, long_eHGV, long_eHGV_stop values
    df_avg['long_HGV'] = round(df_avg['avg_long'] * p_HGV)
    df_avg['long_eHGV'] = round(df_avg['long_HGV'] * p_eHGV)
    df_avg['long_eHGV_stop'] = round(df_avg['long_eHGV'] * p_eHGV_stop)
    
    # Create a matrix of zeros with shape (n_rows, num_cases)
    case_matrix = np.zeros((df_avg.shape[0], num_cases), dtype=int)

    # Loop over each row and distribute the value of long_eHGV_stop using normal distribution
    for i, row in df_avg.iterrows():
        total_value = row[target_column]  # Value to distribute across cases

        if total_value > 0:
            # Generate random values using normal distribution
            # Mean is total_value / num_cases, and we set a small std deviation (like 0.5) to control the spread
            mean_value = total_value / num_cases
            std_dev = 0.5  # Standard deviation, can be adjusted based on desired spread

            # Generate random values for case1 to case9
            random_cases = np.random.normal(loc=mean_value, scale=std_dev, size=num_cases)

            # Clip the values to ensure they are not negative
            random_cases = np.clip(random_cases, 0, None)

            # Scale the values so that the sum equals 'total_value'
            scale_factor = total_value / np.sum(random_cases)
            random_cases *= scale_factor

            # Round the values to get integers and ensure sum matches total_value
            random_cases = np.round(random_cases).astype(int)

            # Assign the case values to the case_matrix
            case_matrix[i, :] = random_cases

    # Assign the case matrix to the DataFrame
    df_avg[['case' + str(i) for i in range(1, num_cases + 1)]] = case_matrix
    df_avg.to_csv(f"site{site}_day{day}_arrivals.csv")
    
    return df_avg

    

### Average arrivals for each case (Wednesday)

In [15]:
df_avg_site0_day3 = arrivals_each_case(site = 0, day = 3)
df_avg_site0_day3.iloc[:,4:].to_csv("../arrivals/default_arrivals_day3.csv",header=False)

### Average arrivals for each case (Sunday)

In [16]:
df_avg_site0_day0 = arrivals_each_case(site = 0, day = 0)
df_avg_site0_day0.iloc[:,4:].to_csv("../arrivals/default_arrivals_day0.csv",header=False)