In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import networkx as nx
import matplotlib.pyplot as plt
import random

In [2]:
# Load CSVs into pandas dataframes
activities_df = pd.read_csv("./data/activities.csv")  # Example filename
dependencies_df = pd.read_csv("./data/dependencies.csv")  # Example filename

In [3]:
# Check for missing values in activities_df
activities_df.isnull().sum()

Task        0
Activity    0
Start       1
End         1
dtype: int64

In [4]:
# Remove rows with missing dates 
activities_df.dropna(
    subset=['Start', 'End'], 
    inplace=True
)

# Re-check for missing dates
activities_df.isnull().sum()

Task        0
Activity    0
Start       0
End         0
dtype: int64

In [5]:
# check the content of activities_df
activities_df[['Start', 'End']].head()

Unnamed: 0,Start,End
1,01/11/23,31/12/23
2,01/11/23,31/10/27
3,01/11/23,31/10/24
4,01/10/24,31/10/27
5,01/11/23,31/12/23


In [6]:
# Parse dates into datetime format
activities_df['Start'] = pd.to_datetime(activities_df['Start'], format='%d/%m/%y' , errors='coerce')
activities_df['End'] = pd.to_datetime(activities_df['End'], format='%d/%m/%y' , errors='coerce')
dependencies_df['Start'] = pd.to_datetime(dependencies_df['Start'],format='%d/%m/%y', errors='coerce')
dependencies_df['End'] = pd.to_datetime(dependencies_df['End'], format='%d/%m/%y', errors='coerce')

activities_df.head(5)

Unnamed: 0,Task,Activity,Start,End
1,Task 1.1 Improve and transform the governance ...,A.1.1.1 Introduce the new partners to the achi...,2023-11-01,2023-12-31
2,Task 1.1 Improve and transform the governance ...,A.1.1.2 Update and maintain the governance str...,2023-11-01,2027-10-31
3,Task 1.1 Improve and transform the governance ...,A.1.1.3 Establish the agenda of the new bodies...,2023-11-01,2024-10-31
4,Task 1.1 Improve and transform the governance ...,A.1.1.4 Coordinate the development of the UNIT...,2024-10-01,2027-10-31
5,Task 1.2 Manage and coordinate UNITA,A.1.2.1 Introduce the new partners to the achi...,2023-11-01,2023-12-31


## Visualizing Activities and Dependencies 

### Gantt Chart (Tasks and Activity Timeline)

In [7]:
# Prepare data for Gantt chart
gantt_df = activities_df[['Task', 'Activity', 'Start', 'End']]

fig = px.timeline(gantt_df, 
                  x_start="Start", 
                  x_end="End", 
                  y="Task", 
                  color="Activity", 
                  title="Activity Timeline",
                  labels={"Task": "Task", "Activity": "Activity"})
fig.update_layout(showlegend=False, xaxis_title="Timeline", yaxis_title="Tasks")
fig.show()

### Dependency Network Graph

In [10]:
# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges based on dependencies data
for _, row in dependencies_df.iterrows():
    G.add_edge(row['Activity A'], row['Activity B'], 
               type=row['Type of dependency'], 
               status=row['Dependency status'],
               comment=row['Comment'])

# Create a Plotly layout for the graph
pos = nx.spring_layout(G, seed=42)  # Layout for positioning nodes

# Extract node positions
node_x = [pos[node][0] for node in G.nodes()]
node_y = [pos[node][1] for node in G.nodes()]

# Create edges (with their positions)
edge_x = []
edge_y = []
edge_labels = {}
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.append(x0)
    edge_x.append(x1)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_labels[(edge[0], edge[1])] = f"{G[edge[0]][edge[1]]['type']} - {G[edge[0]][edge[1]]['status']}"

# Create Plotly figure for network
fig = go.Figure()

# Assign random colors to nodes based on task/activity (optional enhancement)
task_colors = {task: f'rgba({random.randint(100, 255)}, {random.randint(100, 255)}, {random.randint(100, 255)}, 0.8)' for task in G.nodes()}
node_color = [task_colors[node] for node in G.nodes()]

# Add edges to the plot
fig.add_trace(go.Scatter(x=edge_x, y=edge_y,
                         line=dict(width=1, color='gray'),
                         hoverinfo='text',
                         mode='lines'))

# Add nodes to the plot
fig.add_trace(go.Scatter(x=node_x, y=node_y,
                         mode='markers+text',
                         hoverinfo='text',
                         text=[node for node in G.nodes()],  # Show full activity names in tooltips
                         textposition='bottom center',
                         marker=dict(color=node_color, size=20, line=dict(width=2, color='black'))))

# Add edge labels with dependency type and status
for edge, label in edge_labels.items():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    fig.add_trace(go.Scatter(x=[(x0 + x1) / 2], y=[(y0 + y1) / 2],
                             mode='text',
                             text=[label],
                             showlegend=False))

# Set layout options for interactivity
fig.update_layout(
    title="Activity Dependency Network",
    title_x=0.5,
    showlegend=False,
    hovermode="closest",
    xaxis=dict(showgrid=False, zeroline=False),
    yaxis=dict(showgrid=False, zeroline=False),
    plot_bgcolor="white",
)

# Display the interactive plot
fig.show()
