In [1]:
from utilities.utilities import load_data, get_records_by_region, create_column, finalize_dataframe, get_extreme_values, create_directory_structure, save_table, save_report, pd, assign_quartile
# settings
region_column_name = 'Region'
table_name = 'dnssec_by_region'
report_name = 'dnssec_by_region'
category = 'dnssec'
column_name_to_results_global = 'Global #'
create_directory_structure()

source_df = load_data('dnssec_checker')

In [2]:
source_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 458 entries, 0 to 457
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   id                 458 non-null    int64 
 1   region             458 non-null    object
 2   name               458 non-null    object
 3   category           458 non-null    object
 4   url                458 non-null    object
 5   dnssec_domain      458 non-null    object
 6   dnssec_nameserver  457 non-null    object
 7   has_dnssec         458 non-null    bool  
 8   dnssec_is_valid    458 non-null    bool  
 9   dnssec_algorithm   50 non-null     object
dtypes: bool(2), int64(1), object(7)
memory usage: 29.6+ KB


In [3]:
# sanity dataset
columns_names = ['has_dnssec', 'dnssec_is_valid']
for column in columns_names:
    source_df.loc[source_df[column].isna(), column] = False

In [4]:
# Analyze of HEIs with DNSSEC enabled by region (Pub/Pvt)


# settings
column_to_sort = 'Without DNSSEC %'
sort_ascending = True
config = [
    {'table_name': 'dnssec_by_region_public', 'hei_type': 'Public'},
    {'table_name': 'dnssec_by_region_private', 'hei_type': 'Private'}
]
dfs = []
for config_item in config:
    table_name = config_item['table_name']
    hei_type = config_item['hei_type']
    columns_to_display = [region_column_name.title(), column_name_to_results_global]
    analysis_df = get_records_by_region(source_df, hei_type=hei_type)
    # create columns
    # Column creation with distribution of records without DNSSEC by region
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Without DNSSEC', criteria=f'has_dnssec == False & category == "{hei_type}"', columns_to_display=columns_to_display)
        # Column creation with distribution of records with invalid dnssec by region
    criteria = 'has_dnssec == True & dnssec_is_valid == False'
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='With Invalid DNSSEC', criteria=f'{criteria} & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Column creation with distribution of records with valid dnssec by region
    criteria = 'has_dnssec == True & dnssec_is_valid == True'
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='With Valid DNSSEC', criteria=f'{criteria} & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Finalize dataframe
    analysis_df = finalize_dataframe(dataframe=analysis_df, column_to_sort=column_to_sort, ascending=sort_ascending, columns_to_display=columns_to_display)
    display(analysis_df)
    dfs.append(analysis_df)
    # save to csv
    save_table(analysis_df, category=category, table_name=table_name)

df_public = dfs[0].add_suffix('(pub)')
df_private = dfs[1].add_suffix('(pvt)')
df_public = df_public.rename(columns={'Region(pub)': 'Region'})
df_private = df_private.rename(columns={'Region(pvt)': 'Region'})
df_combined = df_public.merge(df_private, on='Region', how='outer')
df_combined.fillna(0, inplace=True)
# remove columns with # in the name
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('#')]
#remove columns global
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('Global')]
# remove '%' from name of columns
df_combined.columns = df_combined.columns.str.replace('%', '')

ranks_columns = ['Rank', 'Rank1', 'Rank2']
#add column with sum of best columns
df_combined[ranks_columns[0]] = df_combined['With Valid DNSSEC (pub)'] + df_combined['With Valid DNSSEC (pvt)']
df_combined[ranks_columns[1]] = df_combined['With Invalid DNSSEC (pub)'] + df_combined['With Invalid DNSSEC (pvt)']
df_combined[ranks_columns[2]] = df_combined['Without DNSSEC (pub)'] + df_combined['Without DNSSEC (pvt)']



#order dataframe by column Rank (from highest to lowest)
df_combined = df_combined.sort_values(by=ranks_columns, ascending=False)
# move just row with 'Total' in column Region to the end of the dataframe. (Use pandas.concat instead of append to avoid duplicates)
df_combined = pd.concat([df_combined[df_combined['Region'] != 'Total'], df_combined[df_combined['Region'] == 'Total']])
# reset index
df_combined.reset_index(drop=True, inplace=True)
# remove column Rank
df_combined.drop(columns=ranks_columns, inplace=True)
#Add a column with the quartile corresponding to the position of the row, that is, considering the total of records -1 (to exclude the total row), if a row is in position 2 it should belong to the first quartile.
df_combined['Quartile'] = df_combined.index.map(lambda rank: assign_quartile(rank, len(df_combined)-1))
# moved column 'Quartile' to the second position
cols = list(df_combined.columns)
cols = [cols[0]] + [cols[-1]] + cols[1:-1]
df_combined = df_combined[cols]


save_table(df_combined, category=category, table_name='dnssec_by_region_combined')



Unnamed: 0,Region,Global #,Without DNSSEC #,Without DNSSEC %,With Invalid DNSSEC #,With Invalid DNSSEC %,With Valid DNSSEC #,With Valid DNSSEC %
0,Bayern,34,14,41.176471,0,0.0,20,58.823529
1,Mecklenburg-Vorpommern,7,5,71.428571,0,0.0,2,28.571429
2,Brandenburg,10,8,80.0,0,0.0,2,20.0
3,Bremen,5,4,80.0,0,0.0,1,20.0
4,Hamburg,10,8,80.0,0,0.0,2,20.0
5,Nordrhein-Westfalen,42,36,85.714286,0,0.0,6,14.285714
6,Rheinland-Pfalz,15,13,86.666667,0,0.0,2,13.333333
7,Hessen,22,20,90.909091,0,0.0,2,9.090909
8,Berlin,12,11,91.666667,0,0.0,1,8.333333
9,Niedersachsen,21,20,95.238095,0,0.0,1,4.761905


Unnamed: 0,Region,Global #,Without DNSSEC #,Without DNSSEC %,With Invalid DNSSEC #,With Invalid DNSSEC %,With Valid DNSSEC #,With Valid DNSSEC %
0,Bayern,12,9,75.0,0,0.0,3,25.0
1,Rheinland-Pfalz,6,5,83.333333,0,0.0,1,16.666667
2,Brandenburg,9,8,88.888889,0,0.0,1,11.111111
3,Nordrhein-Westfalen,25,23,92.0,1,4.0,1,4.0
4,Niedersachsen,16,15,93.75,0,0.0,1,6.25
5,Baden-Württemberg,24,24,100.0,0,0.0,0,0.0
6,Berlin,28,28,100.0,0,0.0,0,0.0
7,Bremen,2,2,100.0,0,0.0,0,0.0
8,Hamburg,11,11,100.0,0,0.0,0,0.0
9,Hessen,23,23,100.0,0,0.0,0,0.0


In [None]:
# Analyze of HEIs with DNSSEC enabled by region

# settings
column_to_sort = 'Without DNSSEC (Public) %'
sort_ascending = True
columns_to_display = [region_column_name.title(), column_name_to_results_global]
analysis_df = get_records_by_region(source_df)

# create columns
# Column creation with distribution of records without dnssec by region
only_public = 'category == "Public"'
only_private = 'category == "Private"'
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Without DNSSEC (Public)', criteria=f'has_dnssec == False & {only_public}', columns_to_display=columns_to_display)
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Without DNSSEC (Private)', criteria=f'has_dnssec == False & {only_private}', columns_to_display=columns_to_display)
# Column creation with distribution of records with valid dnssec by region
criteria = 'has_dnssec == True & dnssec_is_valid == True'
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='With Valid DNSSEC (Public)', criteria=f'{criteria} & {only_public}', columns_to_display=columns_to_display)
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='With Valid DNSSEC (Private)', criteria=f'{criteria} & {only_private}', columns_to_display=columns_to_display)
# Column creation with distribution of records with invalid dnssec by region
criteria = 'has_dnssec == True & dnssec_is_valid == False'
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='With Invalid DNSSEC (Public)', criteria=f'{criteria} & {only_public}', columns_to_display=columns_to_display)
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='With Invalid DNSSEC (Private)', criteria=f'{criteria} & {only_private}', columns_to_display=columns_to_display)

# Finalize dataframe
analysis_df = finalize_dataframe(dataframe=analysis_df, column_to_sort=column_to_sort, ascending=sort_ascending, columns_to_display=columns_to_display)
display(analysis_df)

# save to csv
save_table(analysis_df, category=category, table_name=table_name)

In [4]:
# Report in latex
report_results = get_extreme_values(analysis_df)

hei_public_without_dnssec = format(report_results.get("Total").get("Without DNSSEC (Public) %"), ".2f")
hei_public_with_valid_dnssec = format(report_results.get("Total").get("With Valid DNSSEC (Public) %"), ".2f")
hei_public_with_invalid_dnssec = format(report_results.get("Total").get("With Invalid DNSSEC (Public) %"), ".2f")

hei_private_without_dnssec = format(report_results.get("Total").get("Without DNSSEC (Private) %"), ".2f")
hei_private_with_valid_dnssec = format(report_results.get("Total").get("With Valid DNSSEC (Private) %"), ".2f")
hei_private_with_invalid_dnssec = format(report_results.get("Total").get("With Invalid DNSSEC (Private) %"), ".2f")

report_figure = """
\\begin{figure}[htbp]
    \centering
    \includegraphics[width=0.48\\textwidth]{charts/dnssec.pdf}
    \caption{Distribution of DNSSEC usage by regions.}\label{fig:dnssec}
\end{figure}
"""

report = f'{report_figure}\n\n'
report += f'The data presented in Figure~\\ref{{fig:dnssec}} provides an overview of the use of \gls{{dnssec}} at \glspl{{hei}} in \countryName. According to the data, {hei_public_without_dnssec}\% of the public institutions analyzed have not implemented \gls{{dnssec}} on their domains, while {hei_private_without_dnssec}\% of the private institutions analyzed also dont use \gls{{dnssec}}.\n\n'
report += f'On a positive note, {hei_public_with_valid_dnssec}\% of the public institutions analyzed have implemented \gls{{dnssec}} with a valid configurations in their domains, and {hei_private_with_valid_dnssec}\% of the private institutions have implemented valid DNSSEC.\n\n'
report += f'However, {hei_public_with_invalid_dnssec}\% of the public institutions analyzed have implemented invalid \gls{{dnssec}} in their domains, and {hei_private_with_invalid_dnssec}\% of the private institutions also have implemented \gls{{dnssec}} with invalid configurations.\n\n'

report += f'In terms of regional differences, private institutions in {report_results.get("With Valid DNSSEC (Private) %").get("top_regions")[0][0]} ({format(report_results.get("With Valid DNSSEC (Private) %").get("top_regions")[0][1], ".2f")}\%), {report_results.get("With Valid DNSSEC (Private) %").get("top_regions")[1][0]} ({format(report_results.get("With Valid DNSSEC (Private) %").get("top_regions")[1][1], ".2f")}\%), and {report_results.get("With Valid DNSSEC (Private) %").get("top_regions")[2][0]} ({format(report_results.get("With Valid DNSSEC (Private) %").get("top_regions")[2][1], ".2f")}\%), '
report += f'and public institutions in {report_results.get("With Valid DNSSEC (Public) %").get("top_regions")[0][0]} ({format(report_results.get("With Valid DNSSEC (Public) %").get("top_regions")[0][1], ".2f")}\%), {report_results.get("With Valid DNSSEC (Public) %").get("top_regions")[1][0]} ({format(report_results.get("With Valid DNSSEC (Public) %").get("top_regions")[1][1], ".2f")}\%), and {report_results.get("With Valid DNSSEC (Public) %").get("top_regions")[2][0]} ({format(report_results.get("With Valid DNSSEC (Public) %").get("top_regions")[2][1], ".2f")}\%) have a higher usage of \gls{{dnssec}}.\n\n'

report += f'In contrast, private institutions in {report_results.get("With Valid DNSSEC (Private) %").get("bottom_regions")[0][0]} ({format(report_results.get("With Valid DNSSEC (Private) %").get("bottom_regions")[0][1], ".2f")}\%), {report_results.get("With Valid DNSSEC (Private) %").get("bottom_regions")[1][0]} ({format(report_results.get("With Valid DNSSEC (Private) %").get("bottom_regions")[1][1], ".2f")}\%), and {report_results.get("With Valid DNSSEC (Private) %").get("bottom_regions")[2][0]} ({format(report_results.get("With Valid DNSSEC (Private) %").get("bottom_regions")[2][1], ".2f")}\%), '
report += f'and public institutions in {report_results.get("With Valid DNSSEC (Public) %").get("bottom_regions")[0][0]} ({format(report_results.get("With Valid DNSSEC (Public) %").get("bottom_regions")[0][1], ".2f")}\%), {report_results.get("With Valid DNSSEC (Public) %").get("bottom_regions")[1][0]} ({format(report_results.get("With Valid DNSSEC (Public) %").get("bottom_regions")[1][1], ".2f")}\%), and {report_results.get("With Valid DNSSEC (Public) %").get("bottom_regions")[2][0]} ({format(report_results.get("With Valid DNSSEC (Public) %").get("bottom_regions")[2][1], ".2f")}\%) have a lower usage of \gls{{dnssec}}.\n\n'

report += f'Finally, when it comes to invalid settings, private institutions in {report_results.get("With Invalid DNSSEC (Private) %").get("top_regions")[0][0]} ({format(report_results.get("With Invalid DNSSEC (Private) %").get("top_regions")[0][1], ".2f")}\%), {report_results.get("With Invalid DNSSEC (Private) %").get("top_regions")[1][0]} ({format(report_results.get("With Invalid DNSSEC (Private) %").get("top_regions")[1][1], ".2f")}\%), and {report_results.get("With Invalid DNSSEC (Private) %").get("top_regions")[2][0]} ({format(report_results.get("With Invalid DNSSEC (Private) %").get("top_regions")[2][1], ".2f")}\%), '
report += f'and public institutions in {report_results.get("With Invalid DNSSEC (Public) %").get("top_regions")[0][0]} ({format(report_results.get("With Invalid DNSSEC (Public) %").get("top_regions")[0][1], ".2f")}\%), {report_results.get("With Invalid DNSSEC (Public) %").get("top_regions")[1][0]} ({format(report_results.get("With Invalid DNSSEC (Public) %").get("top_regions")[1][1], ".2f")}\%), and {report_results.get("With Invalid DNSSEC (Public) %").get("top_regions")[2][0]} ({format(report_results.get("With Invalid DNSSEC (Public) %").get("top_regions")[2][1], ".2f")}\%) have a higher usage of \gls{{dnssec}} with invalid configurations.\n\n'

print(report)
# save report to file txt
save_report(report=report, category=category, report_name=report_name)


\begin{figure}[htbp]
    \centering
    \includegraphics[width=0.48\textwidth]{charts/dnssec.pdf}
    \caption{Distribution of DNSSEC usage by regions.}\label{fig:dnssec}
\end{figure}


The data presented in Figure~\ref{fig:dnssec} provides an overview of the use of \gls{dnssec} at \glspl{hei} in \countryName. According to the data, 27.65\% of the public institutions analyzed have not implemented \gls{dnssec} on their domains, while 69.36\% of the private institutions analyzed also dont use \gls{dnssec}.

On a positive note, 0.71\% of the public institutions analyzed have implemented \gls{dnssec} with a valid configurations in their domains, and 1.72\% of the private institutions have implemented valid DNSSEC.

However, 0.10\% of the public institutions analyzed have implemented invalid \gls{dnssec} in their domains, and 0.46\% of the private institutions also have implemented \gls{dnssec} with invalid configurations.

In terms of regional differences, private institutions in Oregon (