In [1]:
from utilities.utilities import load_data, get_records_by_region, create_column, finalize_dataframe, get_extreme_values, create_directory_structure, save_table, save_report, pd, assign_quartile
# settings
region_column_name = 'Region'
table_name = 'axfr_by_region'
report_name = 'axfr_by_region'
category = 'axfr'
column_name_to_results_global = 'Global #'
create_directory_structure()

source_df = load_data('axfr_checker')

In [2]:
source_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 458 entries, 0 to 457
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   id                458 non-null    int64 
 1   region            458 non-null    object
 2   name              458 non-null    object
 3   category          458 non-null    object
 4   url               458 non-null    object
 5   axfr_domain       458 non-null    object
 6   axfr_nameservers  458 non-null    object
 7   has_axfr          457 non-null    object
dtypes: int64(1), object(7)
memory usage: 28.8+ KB


In [3]:
# sanity dataset
columns_names = ['has_axfr']
for column in columns_names:
    source_df.loc[source_df[column].isna(), column] = False

In [4]:
# Analyze of HEIs with DNSSEC enabled by region (Pub/Pvt)


# settings
column_to_sort = 'Without axfr %'
sort_ascending = True
config = [
    {'table_name': 'axfr_by_region_public', 'hei_type': 'Public'},
    {'table_name': 'axfr_by_region_private', 'hei_type': 'Private'}
]
dfs = []
for config_item in config:
    table_name = config_item['table_name']
    hei_type = config_item['hei_type']
    columns_to_display = [region_column_name.title(), column_name_to_results_global]
    analysis_df = get_records_by_region(source_df, hei_type=hei_type)
    # create columns
    # Column creation with distribution of records without DNSSEC by region
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Without axfr', criteria=f'has_axfr == False & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Column creation with distribution of records with invalid dnssec by region
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='With axfr', criteria=f'has_axfr == True & category == "{hei_type}"', columns_to_display=columns_to_display)

    # Finalize dataframe
    analysis_df = finalize_dataframe(dataframe=analysis_df, column_to_sort=column_to_sort, ascending=sort_ascending, columns_to_display=columns_to_display)
    display(analysis_df)
    dfs.append(analysis_df)
    # save to csv
    save_table(analysis_df, category=category, table_name=table_name)

df_public = dfs[0].add_suffix('(pub)')
df_private = dfs[1].add_suffix('(pvt)')
df_public = df_public.rename(columns={'Region(pub)': 'Region'})
df_private = df_private.rename(columns={'Region(pvt)': 'Region'})
df_combined = df_public.merge(df_private, on='Region', how='outer')
df_combined.fillna(0, inplace=True)
# remove columns with # in the name
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('#')]
#remove columns global
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('Global')]
# remove '%' from name of columns
df_combined.columns = df_combined.columns.str.replace('%', '')

ranks_columns = ['Rank']
#add column with sum of best columns
df_combined[ranks_columns[0]] = df_combined['With axfr (pub)'] + df_combined['With axfr (pvt)']



#order dataframe by column Rank (from highest to lowest)
df_combined = df_combined.sort_values(by=ranks_columns, ascending=False)
# move just row with 'Total' in column Region to the end of the dataframe. (Use pandas.concat instead of append to avoid duplicates)
df_combined = pd.concat([df_combined[df_combined['Region'] != 'Total'], df_combined[df_combined['Region'] == 'Total']])
# reset index
df_combined.reset_index(drop=True, inplace=True)
# remove column Rank
df_combined.drop(columns=ranks_columns, inplace=True)
#Add a column with the quartile corresponding to the position of the row, that is, considering the total of records -1 (to exclude the total row), if a row is in position 2 it should belong to the first quartile.
df_combined['Quartile'] = df_combined.index.map(lambda rank: assign_quartile(rank, len(df_combined)-1))
# moved column 'Quartile' to the second position
cols = list(df_combined.columns)
cols = [cols[0]] + [cols[-1]] + cols[1:-1]
df_combined = df_combined[cols]


save_table(df_combined, category=category, table_name='axfr_by_region_combined')

Unnamed: 0,Region,Global #,Without axfr #,Without axfr %,With axfr #,With axfr %
0,Brandenburg,10,9,90.0,1,10.0
1,Rheinland-Pfalz,15,14,93.333333,1,6.666667
2,Nordrhein-Westfalen,42,40,95.238095,2,4.761905
3,Bayern,34,33,97.058824,1,2.941176
4,Baden-Württemberg,48,48,100.0,0,0.0
5,Berlin,12,12,100.0,0,0.0
6,Bremen,5,5,100.0,0,0.0
7,Hamburg,10,10,100.0,0,0.0
8,Hessen,22,22,100.0,0,0.0
9,Mecklenburg-Vorpommern,7,7,100.0,0,0.0


Unnamed: 0,Region,Global #,Without axfr #,Without axfr %,With axfr #,With axfr %
0,Bremen,2,1,50.0,1,50.0
1,Baden-Württemberg,24,23,95.833333,1,4.166667
2,Nordrhein-Westfalen,25,24,96.0,1,4.0
3,Berlin,28,27,96.428571,1,3.571429
4,Bayern,12,12,100.0,0,0.0
5,Brandenburg,9,9,100.0,0,0.0
6,Hamburg,11,11,100.0,0,0.0
7,Hessen,23,23,100.0,0,0.0
8,Niedersachsen,16,16,100.0,0,0.0
9,Rheinland-Pfalz,6,6,100.0,0,0.0
