Reading Task Schedule

In [2]:
import pandas as pd

# Read CSV files
tasks_df = pd.read_csv('data/tasks.csv')
change_orders_df = pd.read_csv('data/change_orders.csv')
risk_events_df = pd.read_csv('data/risk_events.csv')
inspections_df = pd.read_csv('data/inspection_records.csv')
weather_history_df = pd.read_csv('data/weather_history.csv')
weather_history_2024_df = pd.read_csv('data/weather_history_2024.csv')
# Display the first few rows of each for confirmation
# print("Tasks Data:")
# print(tasks_df.head(), "\n")

# print("Change Orders Data:")
# print(change_orders_df.head(), "\n")

# print("Risk Events Data:")
# print(risk_events_df.head(), "\n")

# print("Inspection Records Data:")
# print(inspections_df.head(), "\n") 

# print("Weather History Data:")
# print(weather_history_df.head(), "\n")


In [3]:
# Compute priority score
tasks_df['priority_score'] = tasks_df['duration_days'] + tasks_df['type_weight'] + tasks_df['weather_weight']

# Sort tasks by priority (descending)
# df_sorted = tasks_df.sort_values(by='priority_score', ascending=False)

# Show the priority list
print(tasks_df[['task_id', 'task_name', 'priority_score']])

  task_id               task_name  priority_score
0      T1              Excavation              15
1      T2         Soil Compaction              13
2      T3         Foundation Pour              18
3      T4  Basement Waterproofing              12
4      T5             Backfilling              11
5      T6            Slab Casting              13
6      T7     Ground Floor Column               8
7      T8      Ground Floor Walls               9
8      T9               Roof Slab              10


In [4]:
from datetime import timedelta

# Set project_start if not already defined
project_start = pd.to_datetime(tasks_df['start_date']).min()

# Step 1: Load CSVs
changes_df = change_orders_df

# Step 2: Apply changes based on change orders
for _, change in changes_df.iterrows():
    task_id = change['task_id']
    added_duration = change['added_duration']

    # Check if task exists
    if task_id in tasks_df['task_id'].values:
        # Update the duration
        tasks_df.loc[tasks_df['task_id'] == task_id, 'duration_days'] += added_duration

        # Update the cost impact
        tasks_df.loc[tasks_df['task_id'] == task_id, 'actual_cost'] += change['cost_impact']

        # Optional: mark that a change has been applied
        tasks_df.loc[tasks_df['task_id'] == task_id, 'change_applied'] = change['change_id']
        print(f"✅ Change order {change['change_id']} applied to task {task_id}. Duration increased by {added_duration} days.")
    else:
        print(f"Warning: Task {task_id} in change order not found in tasks list.")
#
#
#
#
#
#
# Function to compute start and end dates considering dependencies
def compute_schedule(df, start_date):
    schedule = {}
    for idx, row in df.iterrows():
        dep = row['depends_on']
        duration = row['duration_days']
        risk_score = None

        print(f"Processing task {row['task_id']} with duration {duration} days and dependency {dep}")
        
        # Apply risk score impact
        risk = risk_events_df[risk_events_df['task_id'] == row['task_id']]
        if not risk.empty:
            risk_score = float(risk.iloc[0]['risk_score'])
            if risk_score >= 0.7:
                duration += 3
            elif risk_score >= 0.5:
                duration += 2
            elif risk_score >= 0.3:
                duration += 1

        print(f"Adjusted duration for task {row['task_id']} is now {duration} days due to risk score impact of {risk_score}")

        # Apply inspection failures
        inspection = inspections_df[(inspections_df['task_id'] == row['task_id']) & (inspections_df['passed'] == 0)]
        if not inspection.empty:
            duration += 2  # inspection failed

        print(f"Adjusted duration for task {row['task_id']} is now {duration} days due to inspection failures")
        print("-------------------------------------")

        if pd.isna(dep):
            s_date = start_date
        else:
            s_date = schedule[dep]['end_date'] + timedelta(days=1)
        e_date = s_date + timedelta(days=duration - 1)
        schedule[row['task_id']] = {
            'start_date': s_date,
            'end_date': e_date,
            'risk_score': risk_score,
            'adjusted_duration': duration
        }
    return schedule
#
#
#
#
#
#
#
schedule_map = compute_schedule(tasks_df, project_start)
print("Schedule computed successfully.", schedule_map)

# Add schedule to tasks_df
tasks_df['start_date'] = tasks_df['task_id'].apply(lambda x: schedule_map[x]['start_date'])
tasks_df['end_date'] = tasks_df['task_id'].apply(lambda x: schedule_map[x]['end_date'])
tasks_df['risk_score'] = tasks_df['task_id'].apply(lambda x: schedule_map[x]['risk_score'])
tasks_df['adjusted_duration'] = tasks_df['task_id'].apply(lambda x: schedule_map[x]['adjusted_duration'])

# Step 3: Fill NaN in 'change_applied' with 'None'
tasks_df['change_applied'] = tasks_df['change_applied'].fillna('None')

# Step 4: Save updated tasks
tasks_df.to_csv('result/updated_tasks.csv', index=False)

# Step 5: Print result
print("✅ Updated task list with change orders applied:\n")
# print(tasks_df)


✅ Change order C1 applied to task T3. Duration increased by 2 days.
✅ Change order C2 applied to task T6. Duration increased by 1 days.
✅ Change order C3 applied to task T9. Duration increased by 2 days.
Processing task T1 with duration 7 days and dependency nan
Adjusted duration for task T1 is now 10 days due to risk score impact of 0.7
Adjusted duration for task T1 is now 10 days due to inspection failures
-------------------------------------
Processing task T2 with duration 6 days and dependency T1
Adjusted duration for task T2 is now 6 days due to risk score impact of None
Adjusted duration for task T2 is now 6 days due to inspection failures
-------------------------------------
Processing task T3 with duration 11 days and dependency T2
Adjusted duration for task T3 is now 13 days due to risk score impact of 0.6
Adjusted duration for task T3 is now 15 days due to inspection failures
-------------------------------------
Processing task T4 with duration 5 days and dependency T3
Ad

In [5]:
# Ensure date columns are datetime
weather_df = weather_history_2024_df
weather_df['date'] = pd.to_datetime(weather_df['time'])
# print(tasks_df['start_date'], weather_df['date'])

# Step 2: Aggregate by date
daily_df = weather_df.groupby(weather_df['date'].dt.date).agg({
    'temperature_2m (°C)': 'mean',
    'weather_code (wmo code)': lambda x: x.mode()[0] if not x.mode().empty else x.iloc[0],
    'rain (mm)': 'sum',
    # 'snowfall (cm)': 'sum',
    'precipitation_mm': 'mean',
    # 'apparent_temperature (°C)': 'mean',
    # 'relative_humidity_2m (%)': 'mean',
    # 'precipitation_probability (%)': 'max',
    # 'showers (mm)': 'sum',
    # 'snow_depth (m)': 'max',
    # 'pressure_msl (hPa)': 'mean',
    # 'surface_pressure (hPa)': 'mean',
    # 'visibility (m)': 'min',
    # 'evapotranspiration (mm)': 'sum',
    # 'et0_fao_evapotranspiration (mm)': 'sum',
    'wind_speed_10m (km/h)': 'mean',
    # 'wind_speed_80m (km/h)': 'mean',
    # 'wind_speed_120m (km/h)': 'mean',
    # 'wind_speed_180m (km/h)': 'mean',
    # 'wind_gusts_10m (km/h)': 'mean',
    # 'temperature_80m (°C)': 'mean',
    # 'temperature_120m (°C)': 'mean',
    # 'temperature_180m (°C)': 'mean',
    # 'soil_temperature_0cm (°C)': 'mean',
    # 'soil_temperature_6cm (°C)': 'mean',
    # 'soil_temperature_18cm (°C)': 'mean',
    # 'soil_moisture_0_to_1cm (m³/m³)': 'mean',
    # 'soil_moisture_1_to_3cm (m³/m³)': 'mean',
    # 'soil_moisture_3_to_9cm (m³/m³)': 'mean',
    # 'cloud_cover (%)': 'mean',
    # 'soil_moisture_9_to_27cm (m³/m³)': 'mean'
}).reset_index().rename(columns={'date': 'date'})


# Rename columns to match the aggregation results
daily_df = daily_df.rename(columns={
    'temperature_2m (°C)': 'temp_mean',
    'weather_code (wmo code)': 'weather_code_mode',
    'rain (mm)': 'rain_sum',
    # 'snowfall (cm)': 'snowfall_sum',
    'precipitation_mm': 'precipitation_mm',
    # 'apparent_temperature (°C)': 'apparent_temp_mean',
    # 'relative_humidity_2m (%)': 'humidity_mean',
    # 'precipitation_probability (%)': 'precip_prob_max',
    # 'showers (mm)': 'showers_sum',
    # 'snow_depth (m)': 'snow_depth_max',
    # 'pressure_msl (hPa)': 'pressure_msl_mean',
    # 'surface_pressure (hPa)': 'surface_pressure_mean',
    # 'visibility (m)': 'visibility_min',
    # 'evapotranspiration (mm)': 'evapotranspiration_sum',
    # 'et0_fao_evapotranspiration (mm)': 'et0_evapotranspiration_sum',
    'wind_speed_10m (km/h)': 'wind_mean',
    # 'wind_speed_80m (km/h)': 'wind_80m_mean',
    # 'wind_speed_120m (km/h)': 'wind_120m_mean',
    # 'wind_speed_180m (km/h)': 'wind_180m_mean',
    # 'wind_gusts_10m (km/h)': 'wind_gusts_mean',
    # 'temperature_80m (°C)': 'temp_80m_mean',
    # 'temperature_120m (°C)': 'temp_120m_mean',
    # 'temperature_180m (°C)': 'temp_180m_mean',
    # 'soil_temperature_0cm (°C)': 'soil_temp_0cm_mean',
    # 'soil_temperature_6cm (°C)': 'soil_temp_6cm_mean',
    # 'soil_temperature_18cm (°C)': 'soil_temp_18cm_mean',
    # 'soil_moisture_0_to_1cm (m³/m³)': 'soil_moisture_0_1_mean',
    # 'soil_moisture_1_to_3cm (m³/m³)': 'soil_moisture_1_3_mean',
    # 'soil_moisture_3_to_9cm (m³/m³)': 'soil_moisture_3_9_mean',
    # 'cloud_cover (%)': 'cloud_cover_mean',
    # 'soil_moisture_9_to_27cm (m³/m³)': 'soil_moisture_9_27_mean'
})

# In real use, replace this with actual delay data
# Let's simulate a delay if rain > 10mm or wind > 15km/h
# Add more weather parameters that might cause construction delays
# Delay if: rain_sum > 10mm, wind_mean > 15km/h, precipitation_mm > 0.5,
# temp_mean < 2°C or temp_mean > 35°C, or weather_code_mode in severe codes (e.g., 80, 95, 99)
severe_weather_codes = [80, 95, 99]  # heavy rain, thunderstorms, etc.

daily_df['delay'] = (
    (daily_df['rain_sum'] > 10) |
    (daily_df['wind_mean'] > 15) |
    (daily_df['precipitation_mm'] > 0.5) |
    (daily_df['temp_mean'] < 2) |
    (daily_df['temp_mean'] > 35) |
    (daily_df['weather_code_mode'].isin(severe_weather_codes))
).astype(int)

print("Daily Weather Data with Delays:")
print(daily_df.head())

daily_df.to_csv('result/daily_weather_aggregated.csv', index=False)
#
#
#
#

Daily Weather Data with Delays:
         date  temp_mean  weather_code_mode  rain_sum  precipitation_mm  \
0  2024-01-01   6.216667                  3       0.4          0.025000   
1  2024-01-02   4.804167                 61       7.6          0.316667   
2  2024-01-03   9.212500                  3       4.1          0.175000   
3  2024-01-04   4.170833                  3       2.1          0.095833   
4  2024-01-05   0.541667                  3       1.3          0.120833   

   wind_mean  delay  
0   9.695833      0  
1   9.716667      0  
2  14.883333      0  
3  14.062500      0  
4  11.166667      1  


In [6]:
# Ensure date columns are datetime
# tasks_df = pd.read_csv('data/tasks.csv')
tasks_df = pd.read_csv('result/updated_tasks.csv')
daily_weather_df = pd.read_csv('result/daily_weather_aggregated.csv')

tasks_df['start_date'] = pd.to_datetime(tasks_df['start_date'])
tasks_df['end_date'] = pd.to_datetime(tasks_df['end_date'])
weather_df = weather_history_2024_df
weather_df['date'] = pd.to_datetime(weather_df['time'])
daily_df['date'] = pd.to_datetime(weather_df['time'])
# Ensure daily_weather_df['date'] is datetime for comparison
daily_weather_df['date'] = pd.to_datetime(daily_weather_df['date'])
# print(tasks_df['start_date'], weather_df['date'])

#
#
#
#
# Function to predict weather delays and adjust task durations based on weather conditions
def predict_weather_delays(tasks_df, daily_weather_df):
    tasks_df['weather_delay_days'] = 0
    for index, task in tasks_df.iterrows():
        if task['weather_sensitive'] == 1:
            task_weather = daily_weather_df[(daily_weather_df['date'] >= task['start_date']) & 
                                      (daily_weather_df['date'] <= task['end_date'])]
            # print(task_weather)
            # delay_days = (((task_weather['precipitation_mm'] >= 2).sum()) / task_weather['precipitation_mm'].count())
    #         print("hi",(task_weather['precipitation_mm'] >= 2).sum(),task_weather['precipitation_mm'].count())
            delay_days = task_weather['delay'].sum()
            print(f"Task {task['task_id']} weather delay: {delay_days} days due to precipitation, Rain, and Wind.")
            tasks_df.at[index, 'weather_delay_days'] = delay_days
    tasks_df['new_duration'] = tasks_df['duration_days'] + tasks_df['weather_delay_days']
    return tasks_df

# Function to recalculate schedule
def recalculate_schedule(tasks_df):
    tasks_df['new_start_date'] = pd.NaT
    tasks_df['new_end_date'] = pd.NaT
    for index, task in tasks_df.iterrows():
        if pd.isna(task['depends_on']):
            tasks_df.at[index, 'new_start_date'] = task['start_date']
        else:
            predecessor_end = tasks_df[tasks_df['task_id'] == task['depends_on']]['new_end_date'].iloc[0]
            tasks_df.at[index, 'new_start_date'] = predecessor_end + timedelta(days=1)
        tasks_df.at[index, 'new_end_date'] = tasks_df.at[index, 'new_start_date'] + timedelta(days=task['new_duration'] - 1)
    return tasks_df


# Apply weather delays
tasks_df = predict_weather_delays(tasks_df, daily_weather_df)

# Recalculate schedule
tasks_df = recalculate_schedule(tasks_df)

# Format dates for output
tasks_df['new_start_date'] = tasks_df['new_start_date'].dt.strftime('%Y-%m-%d')
tasks_df['new_end_date'] = tasks_df['new_end_date'].dt.strftime('%Y-%m-%d')

# Print updated schedule
print("Updated Project Schedule with Weather Delays:")
print(tasks_df[['task_id', 'task_name', 'new_start_date', 'new_end_date', 'new_duration']])

# Calculate total project delay
original_end = tasks_df['end_date'].max()
new_end = pd.to_datetime(tasks_df['new_end_date'].max())
total_delay = (new_end - original_end).days
print(f"\nTotal Project Delay due to Weather: {total_delay} days")


Task T1 weather delay: 2 days due to precipitation, Rain, and Wind.
Task T2 weather delay: 2 days due to precipitation, Rain, and Wind.
Task T3 weather delay: 1 days due to precipitation, Rain, and Wind.
Task T4 weather delay: 0 days due to precipitation, Rain, and Wind.
Task T5 weather delay: 1 days due to precipitation, Rain, and Wind.
Task T6 weather delay: 0 days due to precipitation, Rain, and Wind.
Updated Project Schedule with Weather Delays:
  task_id               task_name new_start_date new_end_date  new_duration
0      T1              Excavation     2024-05-01   2024-05-09             9
1      T2         Soil Compaction     2024-05-10   2024-05-17             8
2      T3         Foundation Pour     2024-05-18   2024-05-29            12
3      T4  Basement Waterproofing     2024-05-30   2024-06-03             5
4      T5             Backfilling     2024-06-04   2024-06-08             5
5      T6            Slab Casting     2024-06-09   2024-06-15             7
6      T7     