<a href="https://colab.research.google.com/github/faisal-ba-systems/ML-course-documents/blob/main/EDA_on_Teams_APA_under_SBP_2025_phase2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Analysis on SBP 2025
### Number of Goals: 6
### Number of Targets: 35
### Number of Teams: 12


## Import Libraries

In [159]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.subplots as sp
import seaborn as sns

## Import Dataset

In [160]:
!pip install -q gdown

# original excel file
# !gdown 1Hdo9UyKUdRQXUsHlXTU8Usurwg_UKO0do1vE2q1OuvA

# demo excel file
!gdown 13cvXbMVCe9xCiEdRMNREIJ5Uo90hNF4O

Downloading...
From: https://drive.google.com/uc?id=13cvXbMVCe9xCiEdRMNREIJ5Uo90hNF4O
To: /content/demo SBP  Master Data.xlsx
  0% 0.00/236k [00:00<?, ?B/s]100% 236k/236k [00:00<00:00, 15.9MB/s]


In [161]:
excel_path ='/content/demo SBP  Master Data.xlsx'
APA_status_df = pd.read_excel(excel_path,sheet_name='Team APA Status')

print("Shape of dataset:", APA_status_df.shape)
APA_status_df

Shape of dataset: (12, 3)


Unnamed: 0,# SL No.,Name,Team APA Status
0,1,Project Operation,Verfied and Confirmed
1,2,Implementation & ITS,Verfied and Confirmed
2,3,Mobile Apps & Games,Verfied and Confirmed
3,4,Supply Chain,Verfied and Confirmed
4,5,Finance & Logistics,Verfied and Confirmed
5,6,Webcrafter,Progress Review
6,7,InnovX,Verfied and Confirmed
7,8,Application,Submitted
8,9,Business Development,Verfied and Confirmed
9,10,Industry 4.0,Verfied and Confirmed


## Statistical Dataset Analysis

In [162]:
def report_data_types_uniques_check(df):
    col = []
    d_type = []
    uniques = []
    n_uniques = []

    for i in df.columns:
        col.append(i)
        d_type.append(df[i].dtypes)
        uniques.append(df[i].unique()[:5])
        n_uniques.append(df[i].nunique())

    return pd.DataFrame({'Column': col, 'd_type': d_type, 'unique_sample': uniques, 'n_uniques': n_uniques})

report_data_types_uniques_check(APA_status_df)

Unnamed: 0,Column,d_type,unique_sample,n_uniques
0,# SL No.,int64,"[1, 2, 3, 4, 5]",12
1,Name,object,"[Project Operation, Implementation & ITS, Mobi...",12
2,Team APA Status,object,"[Verfied and Confirmed, Progress Review, Submi...",3


### SBP - Team APA Status

In [163]:
all_teams_business_automation = list(APA_status_df['Team APA Status'].unique())
print("Team APA Status Distribution:", len(all_teams_business_automation))

Team APA Status Distribution: 3


In [164]:
# Count the occurrences of each APA Status
status_counts = APA_status_df['Team APA Status'].value_counts().reset_index()
status_counts.columns = ['APA Status', 'Count']
APA_color_discrete_map={
                'Progress Review': 'green',
                'Verfied and Confirmed': 'gold',
                'Submitted': 'darkred',
                'Not Submitted': 'red'}

# Create a pie chart using Plotly
fig = px.pie(status_counts,
             names='APA Status',
             values='Count',
             color='APA Status',
             color_discrete_map=APA_color_discrete_map,
             title='Team APA Status Distribution')

fig.show()

In [165]:
df = pd.read_excel(excel_path,sheet_name='Final Master Data')

print("Shape of dataset:", df.shape)
# df.head()

Shape of dataset: (268, 9)


In [166]:
# Add a counter column (1) to use for count aggregation
APA_status_df['Count'] = 1

# Create bar chart
fig = px.bar(
    APA_status_df,
    x='Name',
    y='Count',
    color='Team APA Status',
    color_discrete_map=APA_color_discrete_map,
    title='APA Submission Status by Team',
    labels={'Count': 'Number of Entries'},
)

# Rotate x-axis labels and hide Y-axis
fig.update_layout(
    xaxis_title='Team Name',
    yaxis_title=None,
    xaxis_tickangle=-45,
    barmode='group',
    plot_bgcolor='white',
    yaxis=dict(showticklabels=False, showgrid=False, zeroline=False)  # Hide Y-axis labels and grid
)

fig.show()

### Use regex to separate the goal number and name

In [167]:
# Use regex to separate the goal number and name
df[['Goal', 'Goal Name']] = df['Goals'].str.extract(r'(Goal \d+):?\s*(.+)')
# Extract 'Target Number' and 'Target Name'
df[['Target', 'Target Name']] = df['Targets'].str.extract(r'(Target \d+\.\d+)\s*:?\s*(.+)')
# df.head()

## Number of Activities VS Team

In [168]:
# Group by team and count activities
activity_counts = df.groupby('Team').size().reset_index(name='Activity Count')

# Create interactive bar chart
fig = px.bar(
    activity_counts,
    x='Team',
    y='Activity Count',
    color='Activity Count',
    color_continuous_scale='RdYlGn',  # Red for lower, green for higher
    hover_data=['Activity Count'],
    labels={'Activity Count': 'Activity Count', 'Team': 'Team Name'},
    title='Total number of Activities per Team'
)

fig.show()

## Analysis Status Count

In [169]:
status_counts = df['Status'].value_counts().reset_index()
status_counts.columns = ['Status', 'Count']
status_color_map = {
    'Total': 'purple',
    'Done': 'green',
    'In-Progress': 'lightgreen',
    'To-Do': 'yellow',
    'Skipped': 'darkred',
}
# Plot using Plotly Express (horizontal bar)
fig = px.bar(
    status_counts,
    x='Count',
    y='Status',
    orientation='h',
    title='Activity Wise Status Distribution',
    color='Status',
    color_discrete_map=status_color_map
)

fig.update_layout(
    xaxis_title='Count',
    yaxis_title='Status',
    showlegend=False,
    plot_bgcolor='white'
)

fig.show()

## Analysis Deadline Distribution

In [170]:
# Assuming your 'Timeline' column is in datetime format, if not, you can convert it like:
df['Timeline'] = pd.to_datetime(df['Deadline'], errors='coerce')

# Extract the month from 'Timeline' column
df['Month'] = df['Timeline'].dt.month_name()  # Get month name (January, February, etc.)

# Group by month and count the occurrences
timeline_counts = df['Month'].value_counts().reset_index()
timeline_counts.columns = ['Month', 'Count']

# Sort the months in chronological order
month_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
timeline_counts['Month'] = pd.Categorical(timeline_counts['Month'], categories=month_order, ordered=True)
timeline_counts = timeline_counts.sort_values('Month')

# Plot using Plotly Express (vertical bar with Month on x-axis and Count on y-axis)
fig = px.bar(
    timeline_counts,
    x='Month',
    y='Count',
    title='Activity Distribution by Month',
    color='Month',
    color_discrete_sequence=px.colors.qualitative.Dark2
)

fig.update_layout(
    xaxis_title='Month',
    yaxis_title='Activity Count',
    showlegend=False,
    plot_bgcolor='white'
)

fig.show()

## Activity Analysis With Team

### Activity Analysis Done With Team

In [171]:
import plotly.graph_objects as go

# Step 1: Aggregate data
# Total activities per team
total_counts = df.groupby('Team').size().reset_index(name='Total')

# Activities per team by status
status_counts = df.groupby(['Team', 'Status']).size().unstack(fill_value=0).reset_index()

# Merge total with status counts
merged = total_counts.merge(status_counts, on='Team', how='left')

# Step 2: Plot grouped bar chart with custom colors
fig = go.Figure(data=[
    go.Bar(name='Total', x=merged['Team'], y=merged['Total'], marker_color=status_color_map['Total']),
    go.Bar(name='Done', x=merged['Team'], y=merged.get('Done', [0]*len(merged)), marker_color=status_color_map['Done']),
    # go.Bar(name='In Progress', x=merged['Team'], y=merged.get('In-Progress', [0]*len(merged)), marker_color=status_color_map['In-Progress']),
    # go.Bar(name='To Do', x=merged['Team'], y=merged.get('To-Do', [0]*len(merged)), marker_color=status_color_map['To-Do'])
])

# Step 3: Customize layout
fig.update_layout(
    barmode='group',
    title='Activity Count per Team (Total & Done)',
    xaxis_title='Team',
    yaxis_title='Activity Count',
    xaxis_tickangle=-45
)

fig.show()


### Activity Analysis In Progress With Team

In [172]:
import plotly.graph_objects as go

# Step 1: Aggregate data
# Total activities per team
total_counts = df.groupby('Team').size().reset_index(name='Total')

# Activities per team by status
status_counts = df.groupby(['Team', 'Status']).size().unstack(fill_value=0).reset_index()

# Merge total with status counts
merged = total_counts.merge(status_counts, on='Team', how='left')

# Step 2: Plot grouped bar chart with custom colors
fig = go.Figure(data=[
    go.Bar(name='Total', x=merged['Team'], y=merged['Total'], marker_color=status_color_map['Total']),
    # go.Bar(name='Done', x=merged['Team'], y=merged.get('Done', [0]*len(merged)), marker_color=status_color_map['Done']),
    go.Bar(name='In Progress', x=merged['Team'], y=merged.get('In-Progress', [0]*len(merged)), marker_color=status_color_map['In-Progress']),
    # go.Bar(name='To Do', x=merged['Team'], y=merged.get('To-Do', [0]*len(merged)), marker_color=status_color_map['To-Do'])
])

# Step 3: Customize layout
fig.update_layout(
    barmode='group',
    title='Activity Count per Team (Total & In-Progress)',
    xaxis_title='Team',
    yaxis_title='Activity Count',
    xaxis_tickangle=-45
)

fig.show()


### Activity Analysis In Progress and Done With Team

In [173]:
import plotly.graph_objects as go

# Step 1: Aggregate data
# Total activities per team
total_counts = df.groupby('Team').size().reset_index(name='Total')

# Activities per team by status
status_counts = df.groupby(['Team', 'Status']).size().unstack(fill_value=0).reset_index()

# Merge total with status counts
merged = total_counts.merge(status_counts, on='Team', how='left')

# Step 2: Plot grouped bar chart with custom colors
fig = go.Figure(data=[
    go.Bar(name='Total', x=merged['Team'], y=merged['Total'], marker_color=status_color_map['Total']),
    go.Bar(name='Done', x=merged['Team'], y=merged.get('Done', [0]*len(merged)), marker_color=status_color_map['Done']),
    go.Bar(name='In Progress', x=merged['Team'], y=merged.get('In-Progress', [0]*len(merged)), marker_color=status_color_map['In-Progress']),
    # go.Bar(name='To Do', x=merged['Team'], y=merged.get('To-Do', [0]*len(merged)), marker_color=status_color_map['To-Do'])
])

# Step 3: Customize layout
fig.update_layout(
    barmode='group',
    title='Activity Count per Team (Total, Done & In-Progress)',
    xaxis_title='Team',
    yaxis_title='Activity Count',
    xaxis_tickangle=-45
)

fig.show()


# Team Progress Information

In [174]:
team_df = pd.read_excel(excel_path,sheet_name='Team Progress info')
team_df.head()


Unnamed: 0,# SL No.,Name,Task Progress/ Status Change,Deadline Extend,Task Modification,New Task Added
0,1,Project Operation,5,1,0,1
1,2,Implementation & ITS,3,2,0,2
2,3,Mobile Apps & Games,1,0,0,1
3,4,Supply Chain,0,5,0,1
4,5,Finance & Logistics,0,0,0,1


In [175]:
team_df = team_df.drop(columns=['# SL No.'], errors='ignore')
team_df.head()

Unnamed: 0,Name,Task Progress/ Status Change,Deadline Extend,Task Modification,New Task Added
0,Project Operation,5,1,0,1
1,Implementation & ITS,3,2,0,2
2,Mobile Apps & Games,1,0,0,1
3,Supply Chain,0,5,0,1
4,Finance & Logistics,0,0,0,1


In [176]:
total_sums = team_df.iloc[:, 1:].sum().reset_index()
total_sums.columns = ['Metric', 'Total']

fig = px.pie(
    total_sums,
    names='Metric',
    values='Total',
    title='Overall Distribution of Task Changes by Type'
)
fig.show()


In [178]:
# Chart 1: Name vs Task Progress/ Status Change
fig = px.bar(team_df,
    x='Name',
    y='Task Progress/ Status Change',
    color_continuous_scale='RdYlGn',
    color='Task Progress/ Status Change',
    title='Team Name vs Task Progress/ Status Change',
    labels={'Task Progress/ Status Change': 'Task Progress'}
)
fig.show()

In [179]:
# Chart 2: Name vs Deadline Extend
fig = px.bar(team_df,
    x='Name',
    y='Deadline Extend',
    color_continuous_scale='RdYlGn',
    color='Deadline Extend',
    title='Team Name vs Deadline Extend',
    labels={'Deadline Extend': 'Deadline Extensions'}
)
fig.show()

In [180]:
# Chart 3: Name vs Task Modification
fig = px.bar(team_df,
    x='Name',
    y='Task Modification',
    color_continuous_scale='RdYlGn',
    color='Task Modification',
    title='Team Name vs Task Modification',
    labels={'Task Modification': 'Task Modifications'}
)
fig.show()

In [181]:
# Chart 4: Name vs New Task Added
fig = px.bar(team_df,
    x='Name',
    y='New Task Added',
    color_continuous_scale='RdYlGn',
    color='New Task Added',
    title='Team Name vs New Task Added',
    labels={'New Task Added': 'New Tasks Added'}
)
fig.show()

In [182]:
df_long = team_df.melt(
    id_vars='Name',
    value_vars=['Task Progress/ Status Change', 'Deadline Extend'],
    var_name='Metric',
    value_name='Value'
)

fig = px.bar(
    df_long,
    x='Name',
    y='Value',
    color='Metric',
    barmode='group',
    title='Task Progress vs Deadline Extend per Team',
    labels={'Value': 'Count', 'Name': 'Team Name'}
)

fig.update_layout(xaxis_tickangle=-45)
fig.show()

In [183]:
df_long = team_df.melt(
    id_vars='Name',
    value_vars=['Task Progress/ Status Change', 'New Task Added'],
    var_name='Metric',
    value_name='Value'
)

fig = px.bar(
    df_long,
    x='Name',
    y='Value',
    color='Metric',
    barmode='group',
    title='Task Progress vs New Task Added per Team',
    labels={'Value': 'Count', 'Name': 'Team Name'}
)

fig.update_layout(xaxis_tickangle=-45)
fig.show()

In [184]:
df_long = team_df.melt(
    id_vars='Name',
    value_vars=['Task Progress/ Status Change', 'Deadline Extend','Task Modification', 'New Task Added'],
    var_name='Metric',
    value_name='Value'
)

fig = px.bar(
    df_long,
    x='Name',
    y='Value',
    color='Metric',
    barmode='group',
    title='Team Name vs All Metrics',
    labels={'Value': 'Count', 'Name': 'Team Name'}
)

fig.update_layout(xaxis_tickangle=-45)
fig.show()

In [185]:
import plotly.express as px

fig = px.scatter(
    team_df,
    x='Task Progress/ Status Change',
    y='Deadline Extend',
    text='Name',
    color='Task Progress/ Status Change',
    color_continuous_scale='Viridis',
    trendline='ols',
    title='Correlation: Task Progress vs Deadline Extend',
    labels={
        'Task Progress/ Status Change': 'Task Progress',
        'Deadline Extend': 'Deadline Extensions'
    }
)
fig.update_traces(marker=dict(size=12), textposition='top center')
fig.show()


### Parallel Categories: Team Activity Patterns

In [186]:
import plotly.express as px

fig = px.parallel_categories(
    team_df,
    dimensions=[
        'Name',
        'Task Progress/ Status Change',
        'Deadline Extend',
        'Task Modification',
        'New Task Added'
    ],
    color='Task Progress/ Status Change',
    # color_continuous_scale=px.colors.sequential.Inferno,
    labels={
        'Name': 'Team',
        'Task Progress/ Status Change': 'Progress',
        'Deadline Extend': 'Deadline',
        'Task Modification': 'Modification',
        'New Task Added': 'New Tasks'
    },
    title="Parallel Categories: Team Activity Patterns"
)
fig.show()


### Ratio: New Tasks vs Progress
- Idea: Who is getting more new tasks than they finish?

- Ratio > 1 → Getting more new work than they progress → Risk of backlog.

- Ratio < 1 → Keeping up well.

In [187]:
team_df['New_vs_Progress_Ratio'] = team_df['New Task Added'] / team_df['Task Progress/ Status Change'].replace(0, 1)

fig = px.bar(
    team_df,
    x='Name',

    y='New_vs_Progress_Ratio',
    color='New_vs_Progress_Ratio',
    color_continuous_scale='Bluered',
    title='Ratio: New Tasks Added vs Task Progress',
    labels={'Name': 'Team Name', 'New_vs_Progress_Ratio': 'New Tasks / Progress Ratio'}
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


## Bubble Chart: Progress vs Deadline Extend with New Task Added

In [188]:
fig = px.scatter(
    team_df,
    x='Task Progress/ Status Change',
    y='Deadline Extend',
    size='New Task Added',
    text='Name',
    color='New Task Added',
    color_continuous_scale='Plasma',
    title='Progress vs Deadline Extend (Bubble Size = New Task Added)',
    labels={
        'Task Progress/ Status Change': 'Task Progress',
        'Deadline Extend': 'Deadline Extend'
    }
)
fig.update_traces(textposition='top center')
fig.show()


In [189]:
import pandas as pd
import plotly.express as px

# Reshape DataFrame to long format
df_long = team_df.melt(
    id_vars='Name',
    value_vars=[
        'Task Progress/ Status Change',
        'Deadline Extend',
        'Task Modification',
        'New Task Added'
    ],
    var_name='Metric',
    value_name='Value'
)


## Which teams are working well?

In [190]:
fig = px.bar(
    team_df.sort_values('Task Progress/ Status Change', ascending=False),
    x='Name',
    y='Task Progress/ Status Change',
    title='Teams Performing Well (High Task Progress)',
    labels={'Task Progress/ Status Change': 'Progress Count'},
    color='Task Progress/ Status Change'
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


##  Which teams are more delayed?

In [191]:
fig = px.bar(
    team_df.sort_values('Deadline Extend', ascending=False),
    x='Name',
    y='Deadline Extend',
    title='Teams with Most Deadline Extensions',
    labels={'Deadline Extend': 'Extended Deadlines'},
    color='Deadline Extend'
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


## Which teams frequently modify tasks?

In [192]:
fig = px.bar(
    team_df.sort_values('Task Modification', ascending=False),
    x='Name',
    y='Task Modification',
    title='Teams with Frequent Task Modifications',
    labels={'Task Modification': 'Modifications'},
    color='Task Modification'
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


## Which teams have many new tasks added?

In [193]:
fig = px.bar(
    team_df.sort_values('New Task Added', ascending=False),
    x='Name',
    y='New Task Added',
    title='Teams with Most New Tasks Added',
    labels={'New Task Added': 'New Tasks'},
    color='New Task Added'
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


In [194]:
fig = px.bar(
    team_df.sort_values('Task Progress/ Status Change', ascending=True),
    x='Name',
    y='Task Progress/ Status Change',
    title='Teams Performing Well (High Task Progress)',
    labels={'Task Progress/ Status Change': 'Progress Count'},
    color='Task Progress/ Status Change'
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


## Which teams are only receiving new tasks but not progressing?

In [195]:
filtered = team_df[
    (team_df['New Task Added'] > 1) &
    (team_df['Task Progress/ Status Change'] <= 1)
]

fig = px.bar(
    filtered,
    x='Name',
    y='New Task Added',
    title='Teams Receiving New Tasks but Not Progressing',
    color='New Task Added'
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


## Which teams are the most reactive (many modifications or deadline extensions)?

In [196]:
team_df['Reactivity'] = team_df['Task Modification'] + team_df['Deadline Extend']

fig = px.bar(
    team_df.sort_values('Reactivity', ascending=False),
    x='Name',
    y='Reactivity',
    title='Most Reactive Teams (Modifications + Deadline Extensions)',
    color='Reactivity'
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


## Which teams are highly efficient?

Efficiency = Progress / (Total - Progress)

Efficiency → High means more task progress relative to interruptions


In [197]:
team_df['Efficiency'] = team_df['Task Progress/ Status Change'] / (
    team_df['Deadline Extend'] + team_df['Task Modification'] + team_df['New Task Added'] + 1e-5)

fig = px.bar(
    team_df.sort_values('Efficiency', ascending=False),
    x='Name',
    y='Efficiency',
    title='Most Efficient Teams (Progress vs Interruption)',
    color='Efficiency'
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


## Who always extends deadlines with little progress?

Logic: Deadline Extend > Task Progress/Status Change

In [198]:
extenders = team_df[team_df['Deadline Extend'] > team_df['Task Progress/ Status Change']]

fig = px.bar(
    extenders,
    x='Name',
    y='Deadline Extend',
    color='Deadline Extend',
    title='Teams Extending Deadlines More Than Progressing',
    labels={'Deadline Extend': 'Deadline Extensions'}
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


## Which teams are newly active (only new tasks)?

Logic: New Task Added > 0 and other 3 metrics are 0

In [199]:
newly_active = team_df[
    (team_df['New Task Added'] > 0) &
    (team_df[['Task Progress/ Status Change', 'Deadline Extend', 'Task Modification']].sum(axis=1) == 0)
]

fig = px.bar(
    newly_active,
    x='Name',
    y='New Task Added',
    color='New Task Added',
    title='Newly Active Teams (Only New Tasks, No Other Activity)',
    labels={'New Task Added': 'New Tasks'}
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


## Possibly Struggling Teams (Delays > Progress)
Logic: High Deadline Extend + Low Task Progress

In [200]:
team_df['Struggle Score'] = team_df['Deadline Extend'] - team_df['Task Progress/ Status Change']

struggling = team_df.sort_values('Struggle Score', ascending=False).head(5)

fig = px.bar(
    struggling,
    x='Name',
    y='Struggle Score',
    color='Struggle Score',
    title='Possibly Struggling Teams (Delays > Progress)',
    labels={'Struggle Score': 'Delay Minus Progress'}
)
fig.update_layout(xaxis_tickangle=-45)
fig.show()


## Which teams are proactive (progress without new tasks)?
Logic: High Task Progress/Status Change with zero New Task Added

In [201]:
proactive = team_df[
    (team_df['Task Progress/ Status Change'] > 0) &
    (team_df['New Task Added'] == 0)
]
fig = px.bar(proactive, x='Name',
             y='Task Progress/ Status Change',
             color='Task Progress/ Status Change',
             title='Proactive Teams (Progress without New Tasks)')
fig.update_layout(xaxis_tickangle=-45)
fig.show()

## Which teams only react (modify or extend but no progress)?
Logic: Zero Task Progress/Status Change, non-zero Deadline Extend or Task Modification

In [202]:
reactive_only = team_df[
    (team_df['Task Progress/ Status Change'] == 0) &
    ((team_df['Deadline Extend'] > 0) | (team_df['Task Modification'] > 0))
]
fig = px.bar(reactive_only.melt(id_vars='Name',
                                value_vars=['Deadline Extend', 'Task Modification']),
                                x='Name',
                                y='value',
                                color='variable',
                                barmode='group',
                                title='Reactive-Only Teams (Deadline or Modifications without Progress)',
                                labels={'value': 'Count', 'variable': 'Change Type'})
fig.update_layout(xaxis_tickangle=-45)
fig.show()

## Which teams are frequently interrupted (many deadline + mod)?
Logic: High Deadline Extend + Task Modification

In [203]:
team_df['Interruption Score'] = team_df['Deadline Extend'] + team_df['Task Modification']
interrupted = team_df.sort_values('Interruption Score', ascending=False).head(5)
fig = px.bar(interrupted,
             x='Name',
             y='Interruption Score',
             color='Interruption Score',
              title='Most Interrupted Teams (Modifications + Deadline Extensions)')
fig.update_layout(xaxis_tickangle=-45)
fig.show()

## Which teams receive a lot of new tasks but don’t modify them?
Logic: High New Task Added, zero Task Modification

In [204]:
task_receivers = team_df[(team_df['New Task Added'] > 0) & (team_df['Task Modification'] == 0)]
fig = px.bar(task_receivers,
             x='Name',
             y='New Task Added',
             color='New Task Added',
             title='Teams Receiving Tasks but Not Modifying')
fig.update_layout(xaxis_tickangle=-45)
fig.show()

## Which teams make frequent modifications but don’t deliver?
Logic: High Task Modification, zero Task Progress/Status Change

In [205]:
modifiers_only = team_df[(team_df['Task Modification'] > 0) & (team_df['Task Progress/ Status Change'] == 0)]
fig = px.bar(modifiers_only.melt(id_vars='Name',
                                 value_vars=['Task Modification', 'Task Progress/ Status Change']),
                                 x='Name',
                                 y='value',
                                 color='variable',
                                 title='Teams Modifying Tasks but Showing No Progress')
fig.update_layout(xaxis_tickangle=-45)
fig.show()

# Team Wise Filtering

In [206]:
# Import required libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import display, HTML


#####################################
# SECTION 1: TEAM ANALYSIS SECTION
#####################################
print("\n" + "="*50)
print("SECTION 1: TEAM ANALYSIS")
print("="*50)

# Display available teams for reference
# team_names = sorted(df['Team'].unique().tolist())
team_names = df['Team'].unique().tolist()
print(f"\nAvailable teams: {team_names}")

# Team selection dropdown
selected_team = 'HR,Admin & GSD' # @param ["", "Application", "Business Development", "CIRT & Infra", "HR,Admin & GSD", "Implementation & ITS", "Industry 4.o", "InnovX", "Mobile Apps & Games", "Project Operation", "Webcrafter"] {type:"string"}

# Function to analyze team-specific data with interactive plots
def analyze_team(team_name):
    if not team_name:
        print("\nNo team selected. Please select a team from the dropdown.")
        return None

    try:
        team_data = df[df['Team'] == team_name]
        if len(team_data) == 0:
            print(f"\nTeam '{team_name}' not found in the dataset.")
            return None

        print(f"\n=== {team_name} Analysis ===")
        print(f"Total activities: {len(team_data)}")

        # Status breakdown
        status_counts = team_data['Status'].value_counts()
        print("\nStatus distribution:")
        for status, count in status_counts.items():
            print(f"{status}: {count} ({count/len(team_data)*100:.1f}%)")

        # Goal focus
        goal_counts = team_data['Goal'].value_counts()
        print("\nGoal distribution:")
        for goal, count in goal_counts.items():
            print(f"{goal}: {count} ({count/len(goal_counts)*100:.1f}%)")

        # Timeline distribution
        timeline_counts = team_data['Timeline'].value_counts().sort_index()
        print("\nTimeline distribution:")
        for timeline, count in timeline_counts.items():
            print(f"{timeline}: {count}")

        # Create interactive visualizations for the selected team

        # 1. Status distribution pie chart
        status_df = pd.DataFrame({'Status': status_counts.index, 'Count': status_counts.values})
        fig_status = px.pie(
            status_df,
            values='Count',
            names='Status',
            color='Status',
            title=f'Status Distribution for {team_name}',
            # color_discrete_sequence=px.colors.qualitative.Set3,
            color_discrete_map=status_color_map,
            hole=0.3
        )
        fig_status.update_traces(textposition='inside', textinfo='percent+label')
        fig_status.show()

        # 2. Goal distribution bar chart
        goal_df = pd.DataFrame({'Goal': goal_counts.index, 'Count': goal_counts.values})
        goal_df = goal_df.sort_values('Count', ascending=False)
        fig_goals = px.bar(
            goal_df,
            x='Goal',
            y='Count',
            title=f'Goal Distribution for {team_name}',
            color='Goal',
            text='Count'
        )
        fig_goals.update_layout(xaxis_title='Goal', yaxis_title='Number of Activities')
        fig_goals.show()

        # 3. Timeline activity count
        timeline_df = pd.DataFrame({'Timeline': timeline_counts.index, 'Count': timeline_counts.values})
        fig_timeline = px.line(
            timeline_df,
            x='Timeline',
            y='Count',
            title=f'Timeline Activity Count for {team_name}',
            markers=True
        )
        fig_timeline.update_layout(xaxis_title='Timeline', yaxis_title='Number of Activities')
        fig_timeline.update_traces(line=dict(width=3))
        fig_timeline.show()

        # 4. Status by Goal heatmap
        status_by_goal = pd.crosstab(team_data['Goal'], team_data['Status'])
        fig_heatmap = px.imshow(
            status_by_goal,
            text_auto=True,
            aspect="auto",
            title=f'Status by Goal for {team_name}',
            labels=dict(x="Status", y="Goal", color="Count"),
            color_continuous_scale="YlGnBu"
        )
        fig_heatmap.update_layout(height=400)
        fig_heatmap.show()

        # Return data for potential further analysis
        return team_data

    except Exception as e:
        print(f"Error analyzing team: {e}")
        return None

# Run team analysis if a team is selected
if selected_team:
    team_data = analyze_team(selected_team)



SECTION 1: TEAM ANALYSIS

Available teams: ['Project Operation', 'Webcrafter', 'Implementation & ITS', 'Mobile Apps & Games', 'InnovX', 'Application', 'Supply Chain', 'Finance & Logistics', 'Business Development', 'CIRT & Infra', 'Industry 4.o', 'HR,Admin & GSD']

=== HR,Admin & GSD Analysis ===
Total activities: 27

Status distribution:
To-Do: 18 (66.7%)
In-Progress: 6 (22.2%)
Done: 3 (11.1%)

Goal distribution:
Goal 2: 8 (133.3%)
Goal 6: 8 (133.3%)
Goal 1: 5 (83.3%)
Goal 4: 3 (50.0%)
Goal 5: 2 (33.3%)
Goal 3: 1 (16.7%)

Timeline distribution:
2025-01-25 00:00:00: 1
2025-03-25 00:00:00: 1
2025-04-25 00:00:00: 1
2025-05-25 00:00:00: 1
2025-06-25 00:00:00: 2
2025-07-25 00:00:00: 1
2025-09-25 00:00:00: 3
2025-10-25 00:00:00: 2
2025-11-25 00:00:00: 5
2025-12-25 00:00:00: 10


# Goal Wise Filtering

In [207]:
#####################################
# SECTION 2: GOAL ANALYSIS SECTION
#####################################
print("\n" + "="*50)
print("SECTION 2: GOAL ANALYSIS")
print("="*50)

# Display available goals for reference
# goal_names = sorted(df['Goal'].unique().tolist())
goal_names = df['Goal'].unique().tolist()
print(f"\nAvailable goals: {goal_names}")

# Goal selection dropdown
selected_goal = 'Goal 4' # @param ["", "Goal 1", "Goal 2", "Goal 3", "Goal 4", "Goal 5", "Goal 6"] {type:"string"}

# Function to analyze goal-specific data with interactive plots
def analyze_goal(goal_name):
    if not goal_name:
        print("\nNo goal selected. Please select a goal from the dropdown.")
        return None

    try:
        goal_data = df[df['Goal'] == goal_name]
        if len(goal_data) == 0:
            print(f"\nGoal '{goal_name}' not found in the dataset.")
            return None

        print(f"\n=== {goal_name} Analysis ===")
        print(f"Total activities: {len(goal_data)}")

        # Status breakdown
        status_counts = goal_data['Status'].value_counts()
        print("\nStatus distribution:")
        for status, count in status_counts.items():
            print(f"{status}: {count} ({count/len(goal_data)*100:.1f}%)")

        # Team contribution
        team_counts = goal_data['Team'].value_counts()
        print("\nTeam contribution:")
        for team, count in team_counts.items():
            print(f"{team}: {count} ({count/len(goal_data)*100:.1f}%)")

        # Timeline distribution
        timeline_counts = goal_data['Timeline'].value_counts().sort_index()
        print("\nTimeline distribution:")
        for timeline, count in timeline_counts.items():
            print(f"{timeline}: {count}")

        # Create interactive visualizations for the selected goal

        # 1. Status distribution pie chart
        status_df = pd.DataFrame({'Status': status_counts.index, 'Count': status_counts.values})
        fig_status = px.pie(
            status_df,
            values='Count',
            names='Status',
            color='Status',
            title=f'Status Distribution for {goal_name}',
            # color_discrete_sequence=px.colors.qualitative.Pastel,
            color_discrete_map=status_color_map,
            hole=0.3
        )
        fig_status.update_traces(textposition='inside', textinfo='percent+label')
        fig_status.show()

        # 2. Team contribution bar chart
        team_df = pd.DataFrame({'Team': team_counts.index, 'Count': team_counts.values})
        team_df = team_df.sort_values('Count', ascending=False)
        fig_teams = px.bar(
            team_df,
            x='Team',
            y='Count',
            title=f'Team Contribution for {goal_name}',
            color='Team',
            text='Count'
        )
        fig_teams.update_layout(xaxis_title='Team', yaxis_title='Number of Activities')
        fig_teams.update_xaxes(tickangle=45)
        fig_teams.show()

        # 3. Timeline activity count
        timeline_df = pd.DataFrame({'Timeline': timeline_counts.index, 'Count': timeline_counts.values})
        fig_timeline = px.line(
            timeline_df,
            x='Timeline',
            y='Count',
            title=f'Timeline Activity Count for {goal_name}',
            markers=True
        )
        fig_timeline.update_layout(xaxis_title='Timeline', yaxis_title='Number of Activities')
        fig_timeline.update_traces(line=dict(width=3))
        fig_timeline.show()

        # 4. Status by Team heatmap
        status_by_team = pd.crosstab(goal_data['Team'], goal_data['Status'])
        fig_heatmap = px.imshow(
            status_by_team,
            text_auto=True,
            aspect="auto",
            title=f'Status by Team for {goal_name}',
            labels=dict(x="Status", y="Team", color="Count"),
            color_continuous_scale="Viridis"
        )
        fig_heatmap.update_layout(height=400)
        fig_heatmap.show()

        # Return data for potential further analysis
        return goal_data

    except Exception as e:
        print(f"Error analyzing goal: {e}")
        return None

# Run goal analysis if a goal is selected
if selected_goal:
    goal_data = analyze_goal(selected_goal)






SECTION 2: GOAL ANALYSIS

Available goals: ['Goal 1', 'Goal 2', 'Goal 3', 'Goal 4', 'Goal 5', 'Goal 6']

=== Goal 4 Analysis ===
Total activities: 37

Status distribution:
To-Do: 23 (62.2%)
In-Progress: 13 (35.1%)
Done: 1 (2.7%)

Team contribution:
Project Operation: 5 (13.5%)
Webcrafter: 5 (13.5%)
Business Development: 4 (10.8%)
Application: 4 (10.8%)
Industry 4.o: 4 (10.8%)
Mobile Apps & Games: 3 (8.1%)
HR,Admin & GSD: 3 (8.1%)
CIRT & Infra: 3 (8.1%)
Supply Chain: 2 (5.4%)
InnovX: 2 (5.4%)
Implementation & ITS: 1 (2.7%)
Finance & Logistics: 1 (2.7%)

Timeline distribution:
2025-04-25 00:00:00: 1
2025-05-25 00:00:00: 2
2025-06-25 00:00:00: 2
2025-07-25 00:00:00: 1
2025-08-25 00:00:00: 2
2025-09-25 00:00:00: 2
2025-10-25 00:00:00: 6
2025-11-25 00:00:00: 4
2025-12-25 00:00:00: 17


# Team and Goal Combined Filtering

In [208]:
#####################################
# SECTION 3: COMBINED INSIGHTS
#####################################
print("\n" + "="*50)
print("SECTION 3: COMBINED INSIGHTS")
print("="*50)

# Team selection dropdown
selected_team = 'Webcrafter' # @param ["", "Application", "Business Development", "CIRT & Infra", "HR,Admin & GSD", "Implementation & ITS", "Industry 4.o", "InnovX", "Mobile Apps & Games", "Project Operation", "Webcrafter"] {type:"string"}
# Goal selection dropdown
selected_goal = 'Goal 1' # @param ["", "Goal 1", "Goal 2", "Goal 3", "Goal 4", "Goal 5", "Goal 6"] {type:"string"}
# Only run this section if both team and goal are selected
if selected_team and selected_goal:
    # Filter data for the selected team and goal
    combined_data = df[(df['Team'] == selected_team) & (df['Goal'] == selected_goal)]

    if len(combined_data) > 0:
        print(f"\n=== Combined Analysis for Team '{selected_team}' and Goal '{selected_goal}' ===")
        print(f"Total activities: {len(combined_data)}")

        # Status breakdown
        status_counts = combined_data['Status'].value_counts()
        print("\nStatus distribution:")
        for status, count in status_counts.items():
            print(f"{status}: {count} ({count/len(combined_data)*100:.1f}%)")

        # Timeline distribution
        timeline_counts = combined_data['Timeline'].value_counts().sort_index()
        print("\nTimeline distribution:")
        for timeline, count in timeline_counts.items():
            print(f"{timeline}: {count}")

        # Create interactive visualizations for the combined data

        # 1. Status distribution pie chart
        status_df = pd.DataFrame({'Status': status_counts.index, 'Count': status_counts.values})
        fig_combined_status = px.pie(
            status_df,
            values='Count',
            names='Status',
            color='Status',
            title=f'Status Distribution for {selected_team} on {selected_goal}',
            # color_discrete_sequence=px.colors.qualitative.Bold,
            color_discrete_map=status_color_map,
            hole=0.4
        )
        fig_combined_status.update_traces(textposition='inside', textinfo='percent+label')
        fig_combined_status.show()

        # 2. Timeline activity count
        if len(timeline_counts) > 1:  # Only show if there's more than one timeline point
            timeline_df = pd.DataFrame({'Timeline': timeline_counts.index, 'Count': timeline_counts.values})
            fig_combined_timeline = px.bar(
                timeline_df,
                x='Timeline',
                y='Count',
                title=f'Timeline Activities for {selected_team} on {selected_goal}',
                color='Count',
                text='Count'
            )
            fig_combined_timeline.update_layout(xaxis_title='Timeline', yaxis_title='Number of Activities')
            fig_combined_timeline.show()

        # Manually map status labels to colors
        colors = [status_color_map.get(status, 'gray') for status in status_df['Status']]

        # 3. Combined summary in a single view
        fig_summary = make_subplots(
            rows=1, cols=2,
            specs=[[{"type": "pie"}, {"type": "bar"}]],
            subplot_titles=(f"Status Distribution", f"Activity Timeline"),
            horizontal_spacing=0.1
        )

        # Add pie chart
        for i, status in enumerate(status_df['Status']):
            fig_summary.add_trace(
                go.Pie(
                    labels=status_df['Status'],
                    values=status_df['Count'],
                    name="Status",
                    marker=dict(colors=colors),
                    hole=0.4,
                    textinfo='percent+label'
                ),
                row=1, col=1
            )

        # Add timeline bar chart if there's more than one timeline point
        if len(timeline_counts) > 1:
            fig_summary.add_trace(
                go.Bar(
                    x=timeline_df['Timeline'],
                    y=timeline_df['Count'],
                    name="Activities",
                    text=timeline_df['Count'],
                    textposition='auto'
                ),
                row=1, col=2
            )

        fig_summary.update_layout(
            title_text=f"Summary for {selected_team} on {selected_goal}",
            height=500,
            showlegend=False
        )
        fig_summary.show()
    else:
        print(f"\nNo activities found for Team '{selected_team}' working on Goal '{selected_goal}'.")

print("\nAnalysis complete! Review the interactive visualizations above for insights.")


SECTION 3: COMBINED INSIGHTS

=== Combined Analysis for Team 'Webcrafter' and Goal 'Goal 1' ===
Total activities: 7

Status distribution:
In-Progress: 4 (57.1%)
To-Do: 2 (28.6%)
Done: 1 (14.3%)

Timeline distribution:
2025-04-25 00:00:00: 1
2025-06-25 00:00:00: 1
2025-07-25 00:00:00: 1
2025-09-25 00:00:00: 1
2025-10-25 00:00:00: 1
2025-11-25 00:00:00: 2



Analysis complete! Review the interactive visualizations above for insights.
