In [9]:
import time
import csv
import os
from github import Github
from github.GithubException import RateLimitExceededException

# --- CONFIGURATION ---
GITHUB_TOKEN = ""  # Replace with your actual token
OUTPUT_FILE = "all_issues.csv"
TARGET_REPO = "PySimpleGUI/PySimpleGUI"      # Change this for different runs!
BATCH_SIZE = 50                          # We will fetch 50 Positive + 50 Negative = 100 Total

def save_to_csv(issues_list):
    """Appends a list of issues to the CSV file."""
    file_exists = os.path.isfile(OUTPUT_FILE)
    
    with open(OUTPUT_FILE, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Write header only if the file is new
        if not file_exists:
            writer.writerow(["repo", "title", "body", "labels", "is_beginner_friendly"])
        
        for issue in issues_list:
            writer.writerow([
                issue['repo'],
                issue['title'],
                issue['body'],
                issue['labels'],
                issue['label_class']
            ])
    print(f"‚úÖ Saved {len(issues_list)} issues to {OUTPUT_FILE}")

def fetch_issues():
    g = Github(GITHUB_TOKEN)
    repo = g.get_repo(TARGET_REPO)
    
    collected_data = []
    
    print(f"üöÄ Connecting to {TARGET_REPO}...")

    # --- PART 1: Fetch POSITIVE Samples (Good First Issues) ---
    print(f"   Searching for 'good first issue' (Limit: {BATCH_SIZE})...")
    gfi_issues = repo.get_issues(state='open', labels=['good first issue'])
    
    count = 0
    for issue in gfi_issues:
        if count >= BATCH_SIZE: break
        if issue.pull_request: continue # Skip PRs, we only want Issues

        collected_data.append({
            'repo': TARGET_REPO,
            'title': issue.title,
            'body': issue.body,
            'labels': [l.name for l in issue.labels],
            'label_class': 1  # 1 = Beginner Friendly
        })
        count += 1
        print(f"   [+] Found GFI: {issue.title[:30]}...")
    
    time.sleep(2) # Safety Sleep

    # --- PART 2: Fetch NEGATIVE Samples (Complex/Normal Issues) ---
    # We purposefully exclude 'good first issue' label here
    print(f"   Searching for 'complex' issues (Limit: {BATCH_SIZE})...")
    normal_issues = repo.get_issues(state='open') # Gets everything
    
    count = 0
    for issue in normal_issues:
        if count >= BATCH_SIZE: break
        if issue.pull_request: continue 

        # CRITICAL: Only add if it does NOT have the beginner label
        current_labels = [l.name.lower() for l in issue.labels]
        if 'good first issue' not in current_labels:
            collected_data.append({
                'repo': TARGET_REPO,
                'title': issue.title,
                'body': issue.body,
                'labels': [l.name for l in issue.labels],
                'label_class': 0  # 0 = Hard/Complex
            })
            count += 1
            print(f"   [-] Found Normal: {issue.title[:30]}...")

    # --- SAVE ---
    save_to_csv(collected_data)
    print("üéâ Run Complete. Resting for 5 seconds...")
    time.sleep(5)

if __name__ == "__main__":
    try:
        fetch_issues()
    except RateLimitExceededException:
        print("‚ùå Rate Limit Hit! Waiting 60 seconds...")
        time.sleep(60)
    except Exception as e:
        print(f"‚ùå Error: {e}")

  g = Github(GITHUB_TOKEN)


üöÄ Connecting to PySimpleGUI/PySimpleGUI...
   Searching for 'good first issue' (Limit: 50)...
   Searching for 'complex' issues (Limit: 50)...
   [-] Found Normal: [Bug?] [Solved] Menu not gener...
   [-] Found Normal: [ Bug ]  License Key Validatio...
   [-] Found Normal: [Question] Verticalseperator b...
   [-] Found Normal: [Error]  Website typos...
   [-] Found Normal: [ Bug]  TclError with PySimple...
   [-] Found Normal: [Bug]  Crash with PySimpleGUI ...
   [-] Found Normal: [Bug] Keyboard spontaneously s...
   [-] Found Normal: [Bug] Titlebars Cannot be Disa...
   [-] Found Normal: [ Enhancement/Bug/Question]  N...
   [-] Found Normal: [Bug] Custom Titlebar window f...
   [-] Found Normal: [Enhancement] Udemy Course Enh...
   [-] Found Normal: [Question] PySide cannot accep...
   [-] Found Normal: Bug: Error dialog when program...
   [-] Found Normal: [ Bug ]  Problem with HID devi...
   [-] Found Normal: [Bug] Event handling is broken...
   [-] Found Normal: [Error Message] 