In [None]:
import pandas as pd
import os

In [None]:
def scan_refusal_folders(base_path):
    """
    Walks each immediate subfolder of base_path and checks for PDFs whose
    filenames contain 'application' or 'parallel'.
    Returns a DataFrame with columns: Name, IMI, Parallel.
    """
    records = []
    # Iterate over each entry in the refusal folder
    for entry in os.listdir(base_path):
        folder = os.path.join(base_path, entry)
        if not os.path.isdir(folder):
            continue
        name = entry.lower()  # normalize to lowercase

        # look for .pdf files containing the keywords
        has_imi = False
        has_parallel = False
        excludes = {'acknowledgement', 'withdrawal', 'bvb', 'passport','summary'}
        for fn in os.listdir(folder):
            fn_low = fn.lower()
            if fn_low.endswith('.pdf'):
                if ( "application" in fn_low and not any(ex in fn_low for ex in excludes)):
                    has_imi = True
                if 'parallel' in fn_low:
                    has_parallel = True
        records.append({
            'Name': name,
            'IMI': 'yes' if has_imi else 'no',
            'Parallel': 'yes' if has_parallel else 'no'
        })

    return pd.DataFrame(records, columns=['Name', 'IMI', 'Parallel'])

In [None]:
base = 'data_all/Info Students/Refusal'
df_refusal = scan_refusal_folders(base)
df_refusal['type'] ='Refusal'
ref_shape_r = df_refusal.shape[0]

base = 'data_all/Info Students/Grant'
df_grant = scan_refusal_folders(base)
df_grant['type'] ='Grant'
ref_shape_g = df_grant.shape[0]



In [None]:
df_files = pd.concat([df_grant,df_refusal])

In [None]:
df_files.to_csv('files_grant_refusal.csv',index=False)

In [None]:
df_files[df_files.IMI=='no']

In [None]:
(df_refusal.Parallel.value_counts()/ref_shape_r)*100

In [None]:
df_grant.IMI.value_counts()

In [None]:
df_grant.shape

In [None]:
(df_refusal.Parallel.value_counts()/ref_shape)*100

In [None]:
base = 'data_all/Refusal_2'
df_grant = scan_refusal_folders(base)
grant_shape = df_grant.shape[0]

In [None]:
df_grant.IMI.value_counts()

In [None]:
(df_grant.IMI.value_counts()/grant_shape)*100

In [None]:
(df_grant.IMI.value_counts()/grant_shape)*100

In [None]:
(df_grant.Parallel.value_counts()/grant_shape)*100

In [None]:
df_grant[(df_grant.IMI=='yes')].shape

In [None]:
df_refusal[(df_refusal.IMI=='yes')].shape

In [None]:
df_refusal[(df_refusal.IMI=='no')].shape

In [None]:
((625+115)/5700)*100