### Size of the github repo

In [1]:
import subprocess
import os
import pandas as pd

# Settings
pd.set_option('display.max_colwidth', None)

In [2]:
# The URL of your specific repository
repo_url = 'https://github.com/guillermo-navas-palencia/optbinning.git'
repo_name = repo_url.split('/')[-1].replace('.git', '')

# List to hold file information
files_data = []

# Walk through the repository directory
for root, dirs, files in os.walk(repo_name):
    for file in files:
        file_path = os.path.join(root, file)
        try:
            # Determine the file's category
            if ".git" in file_path:
                category = "Git-related"
            elif file_path.endswith('.py'):
                category = "Python"
            elif file_path.endswith('.ipynb'):
                category = "Jupyter Notebook"
            else:
                category = "Other"

            # Count lines in each file
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                line_count = sum(1 for line in f)

            # Extract directory name and file name
            directory_name = os.path.relpath(root, repo_name)
            files_data.append({
                "Directory": directory_name,
                "File Name": file,
                "Line Count": line_count,
                "Category": category
            })
        except Exception as e:
            print(f"Error reading {file_path}: {e}")

# Create a pandas DataFrame
df = pd.DataFrame(files_data)

# Summing line counts by category
df.groupby('Category').agg({'File Name': ['count'], \
                            'Line Count': ['sum']}).reset_index()

Unnamed: 0_level_0,Category,File Name,Line Count
Unnamed: 0_level_1,Unnamed: 1_level_1,count,sum
0,Git-related,28,86819
1,Jupyter Notebook,20,36464
2,Python,100,31174
3,Regular,70,20318


In [3]:
df.sort_values('Line Count', ascending=False).head(20).reset_index()

Unnamed: 0,Directory,File Name,Line Count,Category
30,.git\objects\pack,pack-75c2c9b8daa2b9bdb4a453998b20a9ac78ae3ecf.pack,84714,Git-related
84,doc\source\_images,binning_data_stream.gif,8737,Regular
63,doc\source\tutorials,tutorial_binning_2d.ipynb,4964,Jupyter Notebook
59,doc\source\tutorials,tutorial_binary.ipynb,3794,Jupyter Notebook
62,doc\source\tutorials,tutorial_binary_under_uncertainty.ipynb,3762,Jupyter Notebook
68,doc\source\tutorials,tutorial_continuous.ipynb,2882,Jupyter Notebook
70,doc\source\tutorials,tutorial_counterfactual_binary_target.ipynb,2358,Jupyter Notebook
77,doc\source\tutorials,tutorial_scorecard_monitoring.ipynb,2112,Jupyter Notebook
104,optbinning\binning,binning_statistics.py,2089,Python
73,doc\source\tutorials,tutorial_piecewise_binary.ipynb,2044,Jupyter Notebook
