In [26]:
import json
from datetime import datetime, timezone
import pandas as pd

def analyze_repositories(data):
    repositories = json.loads(data)["repositories"]
    
    analyzed_repos = []

    for repo in repositories:
        total_commits = 0
        latest_commit_date = datetime.min.replace(tzinfo=timezone.utc)
        
        for env, details in repo["environs"].items():
            # Check if the environment details is a list (meaning no commits)
            if isinstance(details, list):
                continue  # Skip to the next environment
            
            # Check if details is a dictionary and process it
            if isinstance(details, dict) and details:
                commit_count = details.get("commit_count", 0)
                total_commits += commit_count
                
                if details.get("latest_commit_date") and details["latest_commit_date"] != "no commits found":
                    commit_date = datetime.strptime(details["latest_commit_date"], "%Y-%m-%dT%H:%M:%S.%f%z")
                    if commit_date > latest_commit_date:
                        latest_commit_date = commit_date

        analyzed_repos.append({
            "name": repo["name"],
            "total_commits": total_commits,
            "latest_commit_date": latest_commit_date if latest_commit_date != datetime.min.replace(tzinfo=timezone.utc) else "No commits"
        })

    analyzed_repos.sort(key=lambda x: (-x["total_commits"], -str(x["latest_commit_date"])))

    return analyzed_repos

# Example usage:
# result = analyze_repositories(json_data)
# for repo in result:
#     print


In [27]:
import json

with open('hr.json', 'r') as f:
    # analyze_repositories(f.read())
    data = json.loads(f.read())

# analyze_repositories(data)

In [28]:
d = []

for rep in data['repositories']:
    env = rep['environs']
    d.append({
            'repository': rep['name'],
            'group': rep['group'],
            'branch': env['master']['branch'],
            'latest_commit_date': env['master']['latest_commit_date'],
            'commit_count': env['master']['commit_count']
    })

In [29]:
df = pd.json_normalize(d,  errors='ignore')

In [12]:
pd.set_option('display.max_rows', 100)

In [33]:
import pandas as pd
from statics import HR_PORTAL_SERVICES, HR_PORTAL_OTHER, HR_PORTAL_TOOLS

# Assuming df is your DataFrame and it's already defined

# Combine the lists of repositories
repositories = HR_PORTAL_SERVICES

# Filter the DataFrame to include only the repositories listed
# df_filtered = df[df['repository'].isin(repositories)]

# Sort the filtered DataFrame as requested
# df_sorted = df_filtered.sort_values(by=['commit_count', 'latest_commit_date'], ascending=[False, True])
df_sorted = df.sort_values(by=['latest_commit_date'], ascending=[False])

# print(df_sorted['repository'].values.tolist())

df_sorted

Unnamed: 0,repository,group,branch,latest_commit_date,commit_count
11,hr-landing,hr,master,no commits found,0
3,salary-sync,hr,master,2024-02-08T16:25:20.000+01:00,27
51,hr-frontends-monorepo,hr,master,2024-02-08T16:13:14.000+03:00,872
4,ListOfContracts,hr,master,2024-02-07T19:16:22.000+03:00,5
12,welcome-landing,hr,master,2024-02-07T18:27:03.000+03:00,76
18,pa-sync,hr,master,2024-02-07T14:44:27.000+00:00,142
91,account-backend,hr,master,2024-02-07T10:28:15.000+04:00,443
9,bff,hr,master,2024-02-07T09:30:35.000+00:00,138
22,hr-st,hr,master,2024-02-06T10:26:45.000+00:00,38
69,authorization,hr,master,2024-02-06T07:34:00.000+00:00,377


In [20]:
# filtered_df = df_sorted[df_sorted['latest_commit_date'].dt.year == 2024]

df['Date'] = pd.to_datetime(df['latest_commit_date'], errors='coerce')

# Filter the DataFrame by year
filtered_df = df[df['Date'].dt.year == 2024]

filtered_df

  df['Date'] = pd.to_datetime(df['latest_commit_date'], errors='coerce')


AttributeError: Can only use .dt accessor with datetimelike values

In [16]:

df[df['group'] == 'hr']

# df.columns
df_sorted = df.sort_values(by=['commit_count', 'latest_commit_date'], ascending=[False, False])
df_sorted.to_csv('hr_portal_audit.csv')

In [8]:
import pandas as pd

pd.read_json('hr.json')

Unnamed: 0,repositories
0,"{'environs': {'master': {'branch': 'master', '..."
1,"{'environs': {'master': {'branch': 'master', '..."
2,"{'environs': {'master': {'branch': 'master', '..."
3,"{'environs': {'master': {'branch': 'master', '..."
4,"{'environs': {'master': {'branch': 'master', '..."
...,...
95,"{'environs': {'master': {'branch': 'master', '..."
96,"{'environs': {'master': {'branch': 'master', '..."
97,"{'environs': {'master': {'branch': 'master', '..."
98,"{'environs': {'master': {'branch': 'master', '..."
