In [1]:
# M Science Analysis - Part 2B: GitHub Developer Intelligence
# Real Alternative Data from GitHub API - Developer Adoption Signals
# Predicting Software Company Performance Through Developer Community Health

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import json
from datetime import datetime, timedelta
import time
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (14, 8)

print("👨‍💻 GITHUB DEVELOPER INTELLIGENCE ANALYSIS")
print("=" * 50)
print("Using real GitHub API data to track developer adoption signals")
print("Alternative data approach for predicting software company performance")
print()

👨‍💻 GITHUB DEVELOPER INTELLIGENCE ANALYSIS
Using real GitHub API data to track developer adoption signals
Alternative data approach for predicting software company performance



In [None]:
# =============================================================================
# 1. GITHUB API SETUP & COMPANY MAPPING
# =============================================================================

# Charles Rogers' coverage companies mapped to their GitHub presence
COMPANY_GITHUB_MAPPING = {
    # Cloud Infrastructure & DevTools
    'MDB': {
        'name': 'MongoDB',
        'ticker': 'MDB',
        'main_repo': 'mongodb/mongo',
        'additional_repos': ['mongodb/mongoid', 'mongodb/node-mongodb-native', 'mongodb/motor'],
        'org': 'mongodb',
        'category': 'Database'
    },
    'NET': {
        'name': 'Cloudflare',
        'ticker': 'NET', 
        'main_repo': 'cloudflare/workers-sdk',
        'additional_repos': ['cloudflare/cloudflare-go', 'cloudflare/terraform-provider-cloudflare', 'cloudflare/wrangler'],
        'org': 'cloudflare',
        'category': 'CDN/Edge'
    },
    'GTLB': {
        'name': 'GitLab',
        'ticker': 'GTLB',
        'main_repo': 'gitlabhq/gitlabhq',
        'additional_repos': ['gitlab-org/gitlab-runner', 'gitlab-org/gitlab-foss'],
        'org': 'gitlabhq',
        'category': 'DevOps'
    },
    'DOCN': {
        'name': 'DigitalOcean',
        'ticker': 'DOCN',
        'main_repo': 'digitalocean/doctl',
        'additional_repos': ['digitalocean/terraform-provider-digitalocean', 'digitalocean/sample-django'],
        'org': 'digitalocean',
        'category': 'Cloud Platform'
    },
    'TEAM': {
        'name': 'Atlassian',
        'ticker': 'TEAM',
        'main_repo': 'atlassian/react-beautiful-dnd',
        'additional_repos': ['atlassian/atlaskit-mk-2', 'atlassian/design-system'],
        'org': 'atlassian',
        'category': 'DevTools'
    },
    'AKAM': {
        'name': 'Akamai',
        'ticker': 'AKAM',
        'main_repo': 'akamai/cli',
        'additional_repos': ['akamai/terraform-provider-akamai', 'akamai/boomerang'],
        'org': 'akamai',
        'category': 'CDN'
    },
    'FSLY': {
        'name': 'Fastly',
        'ticker': 'FSLY',
        'main_repo': 'fastly/cli',
        'additional_repos': ['fastly/terraform-provider-fastly', 'fastly/compute-starter-kit-javascript-default'],
        'org': 'fastly',
        'category': 'CDN/Edge'
    }
}

# GitHub API configuration
GITHUB_API_BASE = 'https://api.github.com'
# Note: For production, you'd want to add your GitHub token for higher rate limits
# GITHUB_TOKEN = 'your_token_here'  # 5000 requests/hour with token vs 60 without

def get_github_data(endpoint, params=None):
    """
    Fetch data from GitHub API with error handling and rate limiting
    """
    url = f"{GITHUB_API_BASE}/{endpoint}"
    headers = {
        'Accept': 'application/vnd.github.v3+json',
        'User-Agent': 'M-Science-Analysis'
    }
    
    # Add token if available (uncomment if you have a token)
    # if 'GITHUB_TOKEN' in globals():
    #     headers['Authorization'] = f'token {GITHUB_TOKEN}'
    
    try:
        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 403:
            print(f"⚠️  Rate limited. Waiting 60 seconds...")
            time.sleep(60)
            response = requests.get(url, headers=headers, params=params)
        
        response.raise_for_status()
        return response.json()
    
    except requests.exceptions.RequestException as e:
        print(f"❌ Error fetching {endpoint}: {e}")
        return None

print("🔧 Setting up GitHub API connection...")
print("📝 Note: Using unauthenticated API (60 requests/hour limit)")
print("   For production: Add GitHub token for 5000 requests/hour")