In [1]:
import requests
import csv
import json

In [2]:
# Metodo para obtener los metadatos generales de un repositorio de github con su API :
def getMetadata(owner, repo):
    url = f"https://api.github.com/repos/{owner}/{repo}"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        metadata = {
            "full_name": data.get("full_name"),
            "description": data.get("description"),
            "clone_url": data.get("clone_url"),
            "stargazers_count": data.get("stargazers_count"),
            "forks_count": data.get("forks_count"),
            "open_issues_count": data.get("open_issues_count"),
            "default_branch": data.get("default_branch"),
            "created_at": data.get("created_at"),
            "updated_at": data.get("updated_at"),
            "pushed_at": data.get("pushed_at"),
            "language": data.get("language"),
            "license": data.get("license")["name"] if data.get("license") else None
        }
        return metadata
    else:
        print(f"Error: {response.status_code}")
        return None

In [None]:
# Ejemplo de uso:
if __name__ == "__main__":
    #x Variables
    #owner = "giosicolo"
    #repo = "api-backend-ecokit"

    #Por parametro
    owner = input("Ingrese el propietario del repositorio (owner): ")
    repo = input("Ingrese el nombre del repositorio (repo): ")
    
    meta = getMetadata(owner, repo)
    if meta:
        print("\n Los metadatos del repositorio son: \n")
        for key, value in meta.items():
           
            print(f"{key}: {value}")

In [8]:
def getAllIssues(owner, repo, state):
   
    all_issues = []
    page = 1
    while True:
        url = f"https://api.github.com/repos/{owner}/{repo}/issues"
    
        params = {
            "state": state,
            "per_page": 100,
            "page": page
        }
        
        response = requests.get(url, params=params)

        if response.status_code != 200:
            print(f"Error: {response.status_code}")
            break

        issues = response.json()
        # si no quedan issues por tomar
        if not issues:
            break

        all_issues.extend(issues)
        #Cambio de pagina - Git Hub te deja tomar hasta 100 por pagina
        page += 1

    return all_issues

In [9]:
def getAllIssuesWithComments(owner, repo, state):
    all_issues = []
    page = 1

    while True:
        url = f"https://api.github.com/repos/{owner}/{repo}/issues"
        params = {
            "state": state,
            "per_page": 100,
            "page": page
        }

        response = requests.get(url, params=params)
        if response.status_code != 200:
            print(f"Error: {response.status_code}")
            break

        issues = response.json()

        if not issues:
            break

        # Para cada issue, obtener sus comentarios
        for issue in issues:
            issue_number = issue.get("number")
            if "pull_request" in issue:
                issue["all_comments"] = []
                continue

            comments_url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}/comments"
            comments_resp = requests.get(comments_url)

            if comments_resp.status_code != 200:
                print(f"Error al obtener comentarios de la issue {issue_number}: {comments_resp.status_code}")
                issue["all_comments"] = []
            else:
                issue["all_comments"] = comments_resp.json()

        all_issues.extend(issues)
        page += 1

    return all_issues

In [10]:
def printIssue(issue):
    print(f"{'='*80}")
    print(f"Issue #{issue.get('number')}: {issue.get('title')}")
    print(f"Estado: {issue.get('state')}")
    print(f"URL: {issue.get('html_url')}")
    print(f"Usuario: {issue.get('user', {}).get('login')}")
    print(f"Etiquetas: {[label['name'] for label in issue.get('labels', [])]}")
    print(f"Asignado a: {issue.get('assignee', {}).get('login') if issue.get('assignee') else 'Nadie'}")
    print(f"Participantes: {issue.get('comments')} comentario(s)")
    print(f"Fecha de creación: {issue.get('created_at')}")
    print(f"Última actualización: {issue.get('updated_at')}")
    print(f"Cerrado en: {issue.get('closed_at')}")
    print(f"Milestone: {issue.get('milestone', {}).get('title') if issue.get('milestone') else 'Ninguno'}")
    print(f"Es pull request: {'Sí' if 'pull_request' in issue else 'No'}")
    print("\nDescripción:")
    print(issue.get('body') if issue.get('body') else "(Sin descripción)")
    print(f"{'='*80}\n")

In [11]:
def saveIssuesCSV(issues, filename):
    # campos a guardar
    fieldnames = [
        'number',
        'title',
        'state',
        'html_url',
        'user',
        'labels',
        'assignee',
        'comments',
        'created_at',
        'updated_at',
        'closed_at',
        'milestone',
        'is_pull_request',
        'body'
    ]

    with open(filename, mode='w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for issue in issues:
            writer.writerow({
                'number': issue.get('number'),
                'title': issue.get('title'),
                'state': issue.get('state'),
                'html_url': issue.get('html_url'),
                'user': issue.get('user', {}).get('login'),
                'labels': ','.join([label['name'] for label in issue.get('labels', [])]),
                'assignee': issue.get('assignee', {}).get('login') if issue.get('assignee') else '',
                'comments': issue.get('comments'),
                'created_at': issue.get('created_at'),
                'updated_at': issue.get('updated_at'),
                'closed_at': issue.get('closed_at'),
                'milestone': issue.get('milestone', {}).get('title') if issue.get('milestone') else '',
                'is_pull_request': 'Sí' if 'pull_request' in issue else 'No',
                'body': (issue.get('body') or '').replace('\n', ' ').replace('\r', ' ')
})

In [12]:
def saveIssuesAndCommentsCSV(issues, filename):
    fieldnames = [
        'number',
        'title',
        'state',
        'html_url',
        'user',
        'labels',
        'assignee',
        'comments_count',
        'comments_text',
        'created_at',
        'updated_at',
        'closed_at',
        'milestone',
        'is_pull_request',
        'body'
    ]

    with open(filename, mode='w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for issue in issues:
            # Número de comentarios
            comments_count = issue.get('comments')

            # Texto de todos los comentarios
            if 'all_comments' in issue:
                comments_text = " | ".join(
                    f"[{c['user']['login']}]: {(c['body'] or '').replace('\n', ' ').replace('\r', ' ')}"
                    for c in issue['all_comments']
                )
            else:
                comments_text = ''

            writer.writerow({
                'number': issue.get('number'),
                'title': issue.get('title'),
                'state': issue.get('state'),
                'html_url': issue.get('html_url'),
                'user': issue.get('user', {}).get('login'),
                'labels': ','.join([label['name'] for label in issue.get('labels', [])]),
                'assignee': issue.get('assignee', {}).get('login') if issue.get('assignee') else '',
                'comments_count': comments_count,
                'comments_text': comments_text,
                'created_at': issue.get('created_at'),
                'updated_at': issue.get('updated_at'),
                'closed_at': issue.get('closed_at'),
                'milestone': issue.get('milestone', {}).get('title') if issue.get('milestone') else '',
                'is_pull_request': 'Sí' if 'pull_request' in issue else 'No',
                'body': (issue.get('body') or '').replace('\n', ' ').replace('\r', ' ')
            })


In [13]:

def saveIssuesJSON(issues, filename):
    # Lista que contendrá todas las issues procesadas
    processed_issues = []

    for issue in issues:
        # Serializar etiquetas
        labels = [label['name'] for label in issue.get('labels', [])]

        # Serializar milestone si existe
        milestone = issue.get('milestone', {}).get('title') if issue.get('milestone') else ''

        # Determinar si es un Pull Request
        is_pull_request = 'pull_request' in issue

        # Obtener comentarios detallados si existen
        comments_list = []
        if 'all_comments' in issue:
            for c in issue['all_comments']:
                comments_list.append({
                    'user': c['user']['login'],
                    'created_at': c['created_at'],
                    'body': (c['body'] or '').replace('\n', ' ').replace('\r', ' ')
                })
        issue_data = {
            'number': issue.get('number'),
            'title': issue.get('title'),
            'state': issue.get('state'),
            'html_url': issue.get('html_url'),
            'user': issue.get('user', {}).get('login'),
            'labels': labels,
            'assignee': issue.get('assignee', {}).get('login') if issue.get('assignee') else '',
            'comments_count': issue.get('comments'),
            'comments': comments_list,
            'created_at': issue.get('created_at'),
            'updated_at': issue.get('updated_at'),
            'closed_at': issue.get('closed_at'),
            'milestone': milestone,
            'is_pull_request': is_pull_request,
            'body': (issue.get('body') or '').replace('\n', ' ').replace('\r', ' ')
        }

        processed_issues.append(issue_data)

    # Guardar todo en JSON
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(processed_issues, f, ensure_ascii=False, indent=2)

    print(f"Archivo JSON generado correctamente: {filename}")

In [15]:
    #owner = "giosicolo"
    #repo = "api-backend-ecokit"

    #Por parametro
    owner = input("Ingrese el propietario del repositorio (owner): ")
    repo = input("Ingrese el nombre del repositorio (repo): ")

    issues = getAllIssues(owner, repo, state="closed")
    issuescomments= getAllIssuesWithComments(owner, repo, state="all")
    saveIssuesCSV(issues, "issues.csv")
    saveIssuesAndCommentsCSV(issuescomments, "issuesComments.csv")
    saveIssuesJSON(issuescomments, "issuesComments.json")
    
    print("Archivos 'issues.csv' creado correctamente.")

       #if issues:
       #for issue in issues:
           #if "pull_request" not in issue:
               #printIssue(issue)
               
       
    

Ingrese el propietario del repositorio (owner):  GraphiteEditor
Ingrese el nombre del repositorio (repo):  Graphite


Error: 422
Error al obtener comentarios de la issue 2740: 403
Error al obtener comentarios de la issue 2738: 403
Error: 403
Archivo JSON generado correctamente: issuesComments.json
Archivos 'issues.csv' creado correctamente.
