# Proyectos bitbucket

## 1. Repositorios

### Consultar

In [25]:
%pip install python-dotenv

import requests
import pandas as pd
from dotenv import load_dotenv
import os
from IPython.display import display


# Configura tus credenciales
# Cargar las variables de entorno desde el archivo .env
load_dotenv()

username = os.getenv("BITBUCKET_USERNAME")
app_password = os.getenv("BITBUCKET_APP_PASSWORD")
workspace = os.getenv("BITBUCKET_WORKSPACE")


url_repos = f"https://api.bitbucket.org/2.0/repositories/{workspace}"

response = requests.get(url_repos, auth=(username, app_password))

if response.status_code == 200:
    repositorios = response.json()
    print(f"Repositorios en {workspace}:")
    repos_data = []
    for repo in repositorios.get("values", []):
        repos_data.append(
            {
                "Nombre": repo.get("name"),
                "URL": repo.get("links", {}).get("html", {}).get("href"),
            }
        )
    repos_df = pd.DataFrame(repos_data)
    repos_df = repos_df.sort_values(
        by="Nombre"
    )  # Ordenar por nombre del repositorio en ascendente
    # print(repos_df)
else:
    print("Error al obtener repositorios:", response.status_code, response.text)

# Almacenar los repositorios en una variable para uso posterior
repositorios = [
    repo["Nombre"]
    for repo in repos_data
    if repo["Nombre"] not in ["pgp", "Pruebas_erp", "Inventario", "b2c", "efi"]
]



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.
Repositorios en itmz:


### Imprimir los datos

In [26]:
repos_df = pd.DataFrame(repositorios, columns=["Repositorio"])
display(repos_df)

Unnamed: 0,Repositorio
0,api_it_management
1,b2b
2,ApiEmpleados
3,fip
4,api_data_ERP


## 2. Funciones

In [27]:
# Función para obtener todos los elementos paginados
def get_all_items(url, params=None):
    items = []
    response = requests.get(url, params=params, auth=(username, app_password))
    if response.status_code != 200:
        print("Error:", response.status_code, response.text)
        return items
    data = response.json()
    items.extend(data.get("values", []))
    return items


## 3. Pull requests

#### Consultar

- Se consultan todos los estados de pr
- 50 registros por páginas, 3 páginas

In [28]:
cantidad_paginas = 3
registros_por_pagina = 50  # Creo que no soporta más de 50 registros por página

# Obtener pull requests
pull_requests = []
# for repo_slug in repositorios:
for repo_slug in ["fip", "b2b"]:
    pr_url = f"https://api.bitbucket.org/2.0/repositories/{workspace}/{repo_slug}/pullrequests"
    for page in range(1, cantidad_paginas + 1):
        pull_requests.extend(
            get_all_items(
                pr_url,
                params={"page": page, "pagelen": registros_por_pagina, "state": "ALL"},
            )
        )
        print(f"Obteniendo pull requests, repositorio: '{repo_slug}' - página: {page}")

print(f"\nTotal de pull requests: {len(pull_requests)}")

Obteniendo pull requests, repositorio: 'fip' - página: 1
Obteniendo pull requests, repositorio: 'fip' - página: 2
Obteniendo pull requests, repositorio: 'fip' - página: 3
Obteniendo pull requests, repositorio: 'b2b' - página: 1
Obteniendo pull requests, repositorio: 'b2b' - página: 2
Obteniendo pull requests, repositorio: 'b2b' - página: 3

Total de pull requests: 300


#### Filtrar registros de más de 60 días

In [29]:
print(f"\nTotal de registros obtenidos: {len(pull_requests)}")

# Filtrar los registros cuya fecha de created_on sea mayor a 60 días
cantidad_dias = 60

fecha_limite = pd.Timestamp.now(tz="America/Bogota") - pd.Timedelta(days=cantidad_dias)
pull_requests = [
    pr for pr in pull_requests if pd.to_datetime(pr["created_on"]) >= fecha_limite
]

print(f"\nRegistros de los últimos {cantidad_dias} días: {len(pull_requests)}\n")


Total de registros obtenidos: 300

Registros de los últimos 60 días: 104



#### Limpiar los datos

- Agrega campos nuevos
- Elimina campos sin uso
- Ordena por created_on DESC

In [30]:
# Modificar el campo de author para mostrar solo el nickname
for pr in pull_requests:
    if "author" in pr and "nickname" in pr["author"]:
        pr["author"] = pr["author"]["nickname"]
    if "source" in pr and "branch" in pr["source"] and "name" in pr["source"]["branch"]:
        pr["branch"] = pr["source"]["branch"]["name"]
        pr["type_branch"] = (
            pr["branch"].split("/")[0] if "/" in pr["branch"] else "unknown"
        )
    if (
        "source" in pr
        and "repository" in pr["source"]
        and "name" in pr["source"]["repository"]
    ):
        pr["repository"] = pr["source"]["repository"]["name"]
    if pr.get("merge_commit") and "hash" in pr["merge_commit"]:
        pr["merge_commit"] = pr["merge_commit"]["hash"]

    # Calculate days_open based on the PR state
    if "state" in pr and "created_on" in pr:
        created_on = pd.to_datetime(pr["created_on"])
        if pr["state"] == "OPEN":
            # For open PRs: days between created_on and now
            days_open = (
                pd.Timestamp.now(tz="America/Bogota")
                - created_on.tz_convert("America/Bogota")
            ).days
        elif pr["state"] == "MERGED" and "updated_on" in pr:
            # For merged PRs: days between created_on and updated_on
            updated_on = pd.to_datetime(pr["updated_on"])
            days_open = (
                updated_on.tz_convert("America/Bogota")
                - created_on.tz_convert("America/Bogota")
            ).days
        else:
            days_open = 0
        pr["days_open"] = days_open


df_pr = pd.DataFrame(pull_requests)

# Sort by created_on in descending order to show most recent records first
df_pr = df_pr.sort_values(by="created_on", ascending=False)

# Eliminar columnas innecesarias
df_pr = df_pr.drop(
    columns=[
        "type",
        "title",
        "description",
        "reason",
        "destination",
        "summary",
        "closed_by",
        "links",
        "source",
    ]
)

#### Imprimir los datos

In [31]:
print(type(df_pr))
display(df_pr)

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,comment_count,task_count,id,state,merge_commit,close_source_branch,author,created_on,updated_on,branch,type_branch,repository,days_open
42,0,0,626,OPEN,,True,Camilo Medina,2025-03-06T19:23:40.370395+00:00,2025-03-06T21:40:41.153762+00:00,feature/BTB-733,feature,b2b,0
44,0,0,625,OPEN,,True,Andrés Felipe Perdomo,2025-03-06T14:39:08.987795+00:00,2025-03-06T15:32:50.595847+00:00,bugfix/BTB-753,bugfix,b2b,0
43,0,0,624,OPEN,,True,Camilo Medina,2025-03-06T13:45:30.413244+00:00,2025-03-06T21:39:35.015917+00:00,feature/BTB-729,feature,b2b,0
1,0,0,770,OPEN,,False,Camila Castañeda,2025-03-06T13:43:17.593867+00:00,2025-03-06T21:40:26.604375+00:00,feature/CPK-915,feature,fip,0
0,0,0,769,MERGED,426776c620a3,True,Andrés Felipe Perdomo,2025-03-05T22:32:16.605845+00:00,2025-03-06T21:56:15.323635+00:00,feature/CPK-921,feature,fip,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,0,0,567,MERGED,1114ffbbc796,True,Andrés Felipe Perdomo,2025-01-08T20:50:11.996985+00:00,2025-01-13T16:21:15.706956+00:00,feature/BTB-670,feature,b2b,4
98,0,0,566,MERGED,728f69c04ee5,True,Camilo Medina,2025-01-08T19:21:47.356993+00:00,2025-01-15T02:38:09.650167+00:00,feature/BTB-633,feature,b2b,6
39,1,0,730,MERGED,bd5001183ac2,True,Andrés Felipe Perdomo,2025-01-08T14:57:40.958047+00:00,2025-01-15T02:42:02.274247+00:00,feature/CPK-888,feature,fip,6
102,0,0,565,MERGED,ba9327604d39,True,Camila Castañeda,2025-01-07T15:03:00.870845+00:00,2025-01-09T13:22:39.140108+00:00,feature/BTB-650,feature,b2b,1


## 4. Commits

In [32]:
# Obtener commits
# commits_url = (
#     f"https://api.bitbucket.org/2.0/repositories/{workspace}/{repo_slug}/commits"
# )
# commits = get_all_items(commits_url, params={"pagelen": 50})
# for pull_request in commits:
#     author = pull_request.get("author", {}).get("user", {}).get("nickname", None)
#     pull_request.pop("author", None)  # Eliminar la columna 'author'
#     pull_request["author"] = author
# df_commits = pd.DataFrame(commits)
# display(df_commits.head(2))

In [33]:
# Ejemplo: Contar la cantidad de commits por autor
# if not df_commits.empty and "author" in df_commits.columns:
#     commits_por_autor = df_commits.groupby("author").size().reset_index(name="commits")
#     print(commits_por_autor)

In [34]:
# Ejemplo: Calcular tiempo promedio en PR (si se cuenta con las columnas 'created_on' y 'updated_on')
# if not df_pr.empty and "created_on" in df_pr.columns and "updated_on" in df_pr.columns:
#     df_pr["created_on"] = pd.to_datetime(df_pr["created_on"])
#     df_pr["updated_on"] = pd.to_datetime(df_pr["updated_on"])
#     df_pr["tiempo_PR"] = (
#         df_pr["updated_on"] - df_pr["created_on"]
#     ).dt.total_seconds() / 3600  # en horas
#     promedio_tiempo = df_pr["tiempo_PR"].mean()
#     print("Tiempo promedio de PR (horas):", promedio_tiempo)

In [35]:
# Asegurarse de que existen las columnas necesarias
# if not df_pr.empty and "created_on" in df_pr.columns and "updated_on" in df_pr.columns:
#     # Convertir a formato datetime
#     df_pr["created_on"] = pd.to_datetime(df_pr["created_on"])
#     df_pr["updated_on"] = pd.to_datetime(df_pr["updated_on"])

#     # Calcular el tiempo de cada PR en horas
#     df_pr["tiempo_PR"] = (
#         df_pr["updated_on"] - df_pr["created_on"]
#     ).dt.total_seconds() / 3600  # en horas

#     # Imprimir el autor y el tiempo de cada PR
#     for index, row in df_pr.iterrows():
#         print(f"PR creado por {row['author']} tomó {row['tiempo_PR']:.2f} horas.")
# else:
#     print(
#         "No se encontraron datos de pull requests o las columnas necesarias están ausentes."
#     )