# Debug Notebook for GitHub Copilot Usage

This notebook demonstrates how to instantiate and invoke methods from your local exporter scripts. You can use it to debug the data extraction and adaptation processes for GitHub Copilot usage data.

In [4]:
# If your project structure is something like:
# PROJECT_ROOT/
#   |-- notebooks/debug_copilot_usage.ipynb
#   |-- src/
#        |-- extractor/
#             |-- github_exporter.py
#             |-- github_copilot_exporter.py
#        |-- common/
#             |-- common.py
#        |-- ...
# make sure to add the project root to sys.path if needed.

import sys
import os
project_root = os.path.abspath(os.path.join('..'))  # Adjust path if needed
if project_root not in sys.path:
    sys.path.append(project_root)

# Now we can import our local modules:


## Prepare a Configuration Dictionary
Below is a sample configuration dictionary. Adjust the values according to your project’s needs or how you manage configs (e.g., from a .ini or .env file).

In [5]:

from src.extractor.github_copilot_exporter import GithubCopilotExporter
from src.transformer.copilot_transformer import CopilotTransformer
from src.loader.csv_loader import CsvLoader

import pandas as pd

pd.options.mode.chained_assignment = None

config = {
    "GITHUB": {
        "github_url": "https://api.github.com",
        "github_org": "gologic-partner",     # Replace with your organization
        "github_token": ""
    }   
}

# Instantiate the Exporter
copilot_exporter = GithubCopilotExporter()
copilot_exporter.initialize_data(config)

copilot_transformer = CopilotTransformer()
copilot_transformer.initialize_data(config)

csv_loader = CsvLoader()
csv_loader.initialize_data({
    "CSV": {
        "csv_filename_prefix": "copilot"  # This will create files like copilot_metrics_chat_global.csv
    }
})

## Call the Copilot Usage Endpoint
We’ll do a simple request to GitHub’s Copilot usage endpoint. Adjust the query parameters (e.g., since, until) as needed.

For reference, see: [Copilot Usage Docs](https://docs.github.com/en/rest/copilot/copilot-usage?apiVersion=2022-11-28).

In [None]:
# Cell 2 - Code
# First get the teams
teams = copilot_exporter.extract_teams()
print("\nTeams found:")
for team in teams:
    print(f"- {team['name']} (slug: {team['slug']})")

In [None]:
# Cell 3 - Code
# Get metrics for each team
metrics_per_team = copilot_exporter.extract_metrics_per_team(teams)
metrics_per_team


In [None]:
adapted_metrics_chat_per_team = copilot_exporter.adapt_metrics_chat_team(metrics_per_team)
adapted_metrics_chat_per_team

In [None]:
adapted_metrics_completition_per_team = copilot_exporter.adapt_metrics_completions_team(metrics_per_team)
adapted_metrics_completition_per_team

In [None]:
global_metrics = copilot_exporter.extract_metrics_global()
global_metrics

In [None]:
# Create a DataFrame from the global metrics data and normalize nested JSON
import pandas as pd
import json
from pandas.io.json import json_normalize

# First flatten the nested copilot_ide_chat structure
flattened_metrics = []
for metric in global_metrics:
    # Get base fields
    flat_metric = {
        'date': metric['date']
    }
    
    # Extract editor metrics
    for editor in metric['copilot_ide_chat']['editors']:
        for model in editor['models']:
            # Add editor and model info
            record = flat_metric.copy()
            record.update({
                'editor_name': editor['name'],
                'model_name': model['name'],
                'total_engaged_users': model.get('total_engaged_users', 0),
                'total_chats': model.get('total_chats', 0)
            })
            
            # Add language-specific metrics if present
            if 'languages' in model:
                for lang in model['languages']:
                    lang_record = record.copy()
                    lang_record.update({
                        'language': lang['name'],
                        'language_engaged_users': lang.get('total_engaged_users', 0)
                    })
                    flattened_metrics.append(lang_record)
            else:
                flattened_metrics.append(record)

# Create normalized DataFrame
df_global_metrics = pd.DataFrame(flattened_metrics)
display(df_global_metrics)


In [None]:
adapted_global_metrics = copilot_exporter.adapt_metrics_chat_global(global_metrics)
adapted_global_metrics


In [None]:
adapted_metrics_completion_global = copilot_exporter.adapt_metrics_completions_global(global_metrics)
adapted_metrics_completion_global


In [None]:
transformed_metrics_chat = copilot_transformer.transform_chat_metrics_team(adapted_metrics_chat_per_team)
print("Transformed Chat Metrics per Team:")
display(transformed_metrics_chat)

In [None]:
transformed_completion_metrics = copilot_transformer.transform_completion_metrics_team(adapted_metrics_completition_per_team)
print("Transformed Completion Metrics per Team:")
display(transformed_completion_metrics)


In [None]:
transformed_global_chat = copilot_transformer.transform_chat_metrics_global(adapted_global_metrics)
print("Transformed Global Chat Metrics:")
display(transformed_global_chat)

In [None]:
transformed_global_completion = copilot_transformer.transform_completion_metrics_global(adapted_metrics_completion_global)
print("Transformed Global Completion Metrics:")
display(transformed_global_completion)

In [None]:
transformed_data = {
    "metrics_chat_global": transformed_global_chat,
    "metrics_chat_team": transformed_metrics_chat,
    "metrics_completion_global": transformed_global_completion,
    "metrics_completion_team": transformed_completion_metrics
}

csv_loader.load_data(transformed_data)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set up the style
sns.set_theme()
sns.set_palette("husl")

# Create a figure with subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 15))

# Plot 1: Global Chat Metrics Over Time
transformed_global_chat.plot(x='date', y=['chat_per_user', 'chat_acceptance_rate'], ax=ax1)
ax1.set_title('Global Chat Metrics Over Time')
ax1.set_xlabel('Date')
ax1.set_ylabel('Count')
ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax1.tick_params(axis='x', rotation=45)

# Plot 2: Global Completion Metrics Over Time
mean_acceptance = transformed_global_completion.groupby('date')['completion_acceptance_rate'].mean()
mean_acceptance.plot(ax=ax2)
ax2.set_title('Average Code Completion Acceptance Rate Over Time')
ax2.set_xlabel('Date')
ax2.set_ylabel('Acceptance Rate')
ax2.legend(['Mean Acceptance Rate'], bbox_to_anchor=(1.05, 1), loc='upper left')
ax2.tick_params(axis='x', rotation=45)

# Plot 3: Chat Metrics by Team Over Time
lines = []
for team in transformed_metrics_chat['team'].unique():
    team_data = transformed_metrics_chat[transformed_metrics_chat['team'] == team]
    line, = ax3.plot(team_data['date'], team_data['chat_acceptance_rate'], label=team)
    lines.append(line)
ax3.set_title('Chat Usage per User by Team Over Time')
ax3.set_xlabel('Date')
ax3.set_ylabel('Chats per User')
ax3.legend(handles=lines, bbox_to_anchor=(1.05, 1), loc='upper left')
ax3.tick_params(axis='x', rotation=45)

# Plot 4: Code Completion by Team Over Time
lines = []
for team in transformed_completion_metrics['team'].unique():
    team_data = transformed_completion_metrics[transformed_completion_metrics['team'] == team]
    line, = ax4.plot(team_data['date'], team_data['completion_acceptance_rate'], label=team)
    lines.append(line)
ax4.set_title('Code Lines Suggested by Team Over Time')
ax4.set_xlabel('Date')
ax4.set_ylabel('Lines of Code')
ax4.legend(handles=lines, bbox_to_anchor=(1.05, 1), loc='upper left')
ax4.tick_params(axis='x', rotation=45)

# Adjust layout to prevent overlap
plt.tight_layout()
plt.show()


## Placeholder export and transform for active users, average active users, inactive seats and added seats this billing period

TODO: refactor with mappings.json to use the global_metrics. rework adapt and transform consequently

In [None]:
daily_active_users = copilot_exporter.extract_daily_active_users()
new_seats_added, inactive_users = copilot_exporter.extract_seats_information()

df_daily_active_users = copilot_exporter.adapt_daily_active_users(daily_active_users)
df_seats = copilot_exporter.adapt_seats_information(new_seats_added, inactive_users)

adapted_data = {
    "df_daily_active_users": df_daily_active_users,
    "df_seats": df_seats
}

transformed_data = copilot_transformer.transform_data(adapted_data)
# load csv

In [None]:
import matplotlib.pyplot as plt
from datetime import datetime
import pandas as pd

def create_charts(transformed_data):
    # Create figure for daily active users
    fig, ax = plt.subplots(figsize=(12, 6))
    df_daily = transformed_data['df_daily_active_users']
    
    # Use dates directly without conversion since they're already timestamps
    ax.plot(df_daily['date'], df_daily['active_users'], marker='o', linestyle='-', linewidth=2)
    ax.set_title('Daily Active Users Over Time')
    ax.set_xlabel('Date')
    ax.set_ylabel('Number of Active Users')
    ax.grid(True)
    
    # Format x-axis dates
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('daily_active_users.png')
    plt.show()

    # 2. Average Active Users
    fig, ax = plt.subplots(figsize=(6, 4))
    avg_users = transformed_data['df_average_active_users']['average_active_users'].iloc[0]
    ax.bar(['Average'], [avg_users], color='skyblue')
    ax.set_title('Average Active Users')
    ax.set_ylabel('Number of Users')
    plt.tight_layout()
    plt.savefig('average_active_users.png')
    plt.show()

    # 3. Seats Distribution
    fig, ax = plt.subplots(figsize=(8, 5))
    seats_data = transformed_data['df_seats']
    labels = ['Seats']
    added = seats_data['added_seats']
    inactive = seats_data['inactive_seats']
    
    ax.bar(labels, added, label='Added Seats', color='lightgreen')
    ax.bar(labels, inactive, bottom=added, label='Inactive Seats', color='lightcoral')
    ax.set_title('Seats Distribution')
    ax.set_ylabel('Number of Seats')
    ax.legend()
    plt.tight_layout()
    plt.savefig('seats_distribution.png')
    plt.show()

# Call the function
create_charts(transformed_data)

## Conclusion
You can now inspect these DataFrames, observe schema changes, and debug any issues with your transformation logic or with the GitHub API data itself. Use additional cells as needed to explore data, handle errors, etc.