In [12]:
import json
from datetime import datetime, timezone

def analyze_repositories(data):
    repositories = json.loads(data)["repositories"]
    
    analyzed_repos = []

    for repo in repositories:
        total_commits = 0
        latest_commit_date = datetime.min.replace(tzinfo=timezone.utc)
        
        for env, details in repo["environs"].items():
            # Check if the environment details is a list (meaning no commits)
            if isinstance(details, list):
                continue  # Skip to the next environment
            
            # Check if details is a dictionary and process it
            if isinstance(details, dict) and details:
                commit_count = details.get("commit_count", 0)
                total_commits += commit_count
                
                if details.get("latest_commit_date") and details["latest_commit_date"] != "no commits found":
                    commit_date = datetime.strptime(details["latest_commit_date"], "%Y-%m-%dT%H:%M:%S.%f%z")
                    if commit_date > latest_commit_date:
                        latest_commit_date = commit_date

        analyzed_repos.append({
            "name": repo["name"],
            "total_commits": total_commits,
            "latest_commit_date": latest_commit_date if latest_commit_date != datetime.min.replace(tzinfo=timezone.utc) else "No commits"
        })

    analyzed_repos.sort(key=lambda x: (-x["total_commits"], -str(x["latest_commit_date"])))

    return analyzed_repos

# Example usage:
# result = analyze_repositories(json_data)
# for repo in result:
#     print


In [14]:
import json

with open('hr.json', 'r') as f:
    # analyze_repositories(f.read())
    data = json.loads(f.read())

# analyze_repositories(data)

In [22]:
d = []

for rep in data['repositories']:
    env = rep['environs']
    d.append({
            'repository': rep['name'],
            'group': rep['group'],
            'branch': env['master']['branch'],
            'latest_commit_date': env['master']['latest_commit_date'],
            'commit_count': env['master']['commit_count']
    })

In [25]:
df = pd.json_normalize(d,  errors='ignore')

In [36]:
pd.set_option('display.max_rows', 100)

In [37]:
import pandas as pd
from statics import HR_PORTAL_SERVICES, HR_PORTAL_OTHER, HR_PORTAL_TOOLS

# Assuming df is your DataFrame and it's already defined

# Combine the lists of repositories
repositories = HR_PORTAL_SERVICES

# Filter the DataFrame to include only the repositories listed
df_filtered = df[df['repository'].isin(repositories)]

# Sort the filtered DataFrame as requested
df_sorted = df_filtered.sort_values(by=['commit_count', 'latest_commit_date'], ascending=[False, True])

df_sorted

Unnamed: 0,repository,group,branch,latest_commit_date,commit_count
19,hr-employee-api,hr,master,2024-02-05T08:39:59.000+00:00,610
91,account-backend,hr,master,2024-02-07T10:28:15.000+04:00,443
69,authorization,hr,master,2024-02-06T07:34:00.000+00:00,377
98,hr-sms-sender,hr,master,2023-10-31T08:14:20.000+00:00,228
73,hr-worktime,hr,master,2023-10-11T07:38:36.000+00:00,208
9,bff,hr,master,2024-02-07T09:30:35.000+00:00,138
90,hr-extended-profile,hr,master,2023-08-09T13:02:49.000+00:00,130
53,enforcement-orders,hr,master,2023-11-07T16:01:34.000+00:00,90
82,hr-employees-history,hr,master,2023-11-10T15:23:47.000+06:00,67
35,hr-wbusers-sync,hr,master,2023-11-30T13:11:20.000+00:00,65


In [40]:

df[df['group'] == 'hr']

# df.columns
df_sorted = df.sort_values(by=['latest_commit_date', 'commit_count'], ascending=[False, False])
df_sorted

Unnamed: 0,repository,group,branch,latest_commit_date,commit_count
11,hr-landing,hr,master,no commits found,0
3,salary-sync,hr,master,2024-02-08T16:25:20.000+01:00,27
51,hr-frontends-monorepo,hr,master,2024-02-08T16:13:14.000+03:00,872
4,ListOfContracts,hr,master,2024-02-07T19:16:22.000+03:00,5
12,welcome-landing,hr,master,2024-02-07T18:27:03.000+03:00,76
18,pa-sync,hr,master,2024-02-07T14:44:27.000+00:00,142
91,account-backend,hr,master,2024-02-07T10:28:15.000+04:00,443
9,bff,hr,master,2024-02-07T09:30:35.000+00:00,138
22,hr-st,hr,master,2024-02-06T10:26:45.000+00:00,38
69,authorization,hr,master,2024-02-06T07:34:00.000+00:00,377


In [17]:
import csv
import json

# data = json.loads(json_data)

# Prepare CSV data
csv_data = [["Name", "Group", "Master Branch", "Master Latest Commit Date", "Master Commit Count", "Stage Branch", "Stage Commit Count", "Stage Offset With Master", "Stage Latest Commit Date", "Dev Branch", "Dev Commit Count", "Dev Latest Commit Date"]]

for repo in data["repositories"]:
    row = [
        repo["name"],
        repo["group"],
        repo["environs"]["master"].get("branch", ""),
        repo["environs"]["master"].get("latest_commit_date", ""),
        repo["environs"]["master"].get("commit_count", 0),
        repo["environs"]["stage"][0].get("branch", "") if repo["environs"]["stage"] else "",
        repo["environs"]["stage"][0].get("commit_count", 0) if repo["environs"]["stage"] else 0,
        repo["environs"]["stage"][0].get("offset_with_master", "") if repo["environs"]["stage"] else "",
        repo["environs"]["stage"][0].get("latest_commit_date", "") if repo["environs"]["stage"] else "",
        "",  # Dev environments are empty in this sample
        0,
        ""
    ]
    csv_data.append(row)

# Convert to CSV string (or write directly to a file)
csv_string = "\n".join([",".join(map(str, row)) for row in csv_data])
print(csv_string)

Name,Group,Master Branch,Master Latest Commit Date,Master Commit Count,Stage Branch,Stage Commit Count,Stage Offset With Master,Stage Latest Commit Date,Dev Branch,Dev Commit Count,Dev Latest Commit Date
salary-api,hr,master,2024-02-05T08:25:16.000+00:00,1,release-stage-dp,15,14,2024-02-08T15:40:43.000+00:00,,0,
Bingo,hr,master,2024-01-15T15:00:44.000+03:00,15,,0,,,,0,
Bingo Demo,hr,master,2024-01-10T14:22:23.000+00:00,1,,0,,,,0,
salary-sync,hr,master,2024-02-08T16:25:20.000+01:00,27,release-stage-dp,20,-7,2024-02-05T15:40:47.000+03:00,,0,
ListOfContracts,hr,master,2024-02-07T19:16:22.000+03:00,5,,0,,,,0,
zup-client,hr,master,2023-12-29T18:00:51.000+03:00,5,,0,,,,0,
employee-cohort,hr,master,2023-12-19T08:58:49.000+00:00,1,release-stage-dp,13,12,2024-01-24T12:54:45.000+03:00,,0,
Documents,hr,master,2023-11-07T19:27:14.000+05:00,3,,0,,,,0,
hr-employee-api-client,hr,master,2024-01-31T12:48:09.000+00:00,28,,0,,,,0,
bff,hr,master,2024-02-07T09:30:35.000+00:00,138,release-stage,133,-5,2024-

In [8]:
import pandas as pd

pd.read_json('hr.json')

Unnamed: 0,repositories
0,"{'environs': {'master': {'branch': 'master', '..."
1,"{'environs': {'master': {'branch': 'master', '..."
2,"{'environs': {'master': {'branch': 'master', '..."
3,"{'environs': {'master': {'branch': 'master', '..."
4,"{'environs': {'master': {'branch': 'master', '..."
...,...
95,"{'environs': {'master': {'branch': 'master', '..."
96,"{'environs': {'master': {'branch': 'master', '..."
97,"{'environs': {'master': {'branch': 'master', '..."
98,"{'environs': {'master': {'branch': 'master', '..."
