In [1]:
from utilities.utilities import load_data, get_records_by_region, create_column, finalize_dataframe, get_extreme_values, create_directory_structure, save_table, save_report, pd, assign_quartile
# settings
region_column_name = 'Region'
table_name = 'dnssec_by_region'
report_name = 'dnssec_by_region'
category = 'dnssec'
column_name_to_results_global = 'Global #'
create_directory_structure()

source_df = load_data('dnssec_checker')

In [2]:
# sanity dataset
columns_names = ['has_dnssec', 'dnssec_is_valid']
for column in columns_names:
    source_df.loc[source_df[column].isna(), column] = False

source_df.info()

In [3]:
# Analyze of HEIs with DNSSEC enabled by region (Pub/Pvt)


# settings
column_to_sort = 'Without DNSSEC %'
sort_ascending = True
config = [
    {'table_name': 'dnssec_by_region_public', 'hei_type': 'Public'},
    {'table_name': 'dnssec_by_region_private', 'hei_type': 'Private'}
]
dfs = []
for config_item in config:
    table_name = config_item['table_name']
    hei_type = config_item['hei_type']
    columns_to_display = [region_column_name.title(), column_name_to_results_global]
    analysis_df = get_records_by_region(source_df, hei_type=hei_type)
    # create columns
    # Column creation with distribution of records without DNSSEC by region
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Without DNSSEC', criteria=f'has_dnssec == False & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Column creation with distribution of records with invalid dnssec by region
    criteria = 'has_dnssec == True & dnssec_is_valid == False'
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='With Invalid DNSSEC', criteria=f'{criteria} & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Column creation with distribution of records with valid dnssec by region
    criteria = 'has_dnssec == True & dnssec_is_valid == True'
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='With Valid DNSSEC', criteria=f'{criteria} & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Finalize dataframe
    analysis_df = finalize_dataframe(dataframe=analysis_df, column_to_sort=column_to_sort, ascending=sort_ascending, columns_to_display=columns_to_display)
    display(analysis_df)
    dfs.append(analysis_df)
    # save to csv
    save_table(analysis_df, category=category, table_name=table_name)

df_public = dfs[0].add_suffix('(pub)')
df_private = dfs[1].add_suffix('(pvt)')
df_public = df_public.rename(columns={'Region(pub)': 'Region'})
df_private = df_private.rename(columns={'Region(pvt)': 'Region'})
df_combined = df_public.merge(df_private, on='Region', how='outer')
df_combined.fillna(0, inplace=True)
# remove columns with # in the name
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('#')]
#remove columns global
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('Global')]
# remove '%' from name of columns
df_combined.columns = df_combined.columns.str.replace('%', '')

ranks_columns = ['Rank']
#add column with sum of best columns
df_combined[ranks_columns[0]] = df_combined['With Valid DNSSEC (pub)'] + df_combined['With Valid DNSSEC (pvt)']


#order dataframe by column Rank (from highest to lowest)
df_combined = df_combined.sort_values(by=ranks_columns, ascending=False)
# move just row with 'Total' in column Region to the end of the dataframe. (Use pandas.concat instead of append to avoid duplicates)
df_combined = pd.concat([df_combined[df_combined['Region'] != 'Total'], df_combined[df_combined['Region'] == 'Total']])
# reset index
df_combined.reset_index(drop=True, inplace=True)
# remove column Rank
df_combined.drop(columns=ranks_columns, inplace=True)
#Add a column with the quartile corresponding to the position of the row, that is, considering the total of records -1 (to exclude the total row), if a row is in position 2 it should belong to the first quartile.
df_combined['Quartile'] = df_combined.index.map(lambda rank: assign_quartile(rank, len(df_combined)-1))
# moved column 'Quartile' to the second position
cols = list(df_combined.columns)
cols = [cols[0]] + [cols[-1]] + cols[1:-1]
df_combined = df_combined[cols]


save_table(df_combined, category=category, table_name='dnssec_by_region_combined')



Unnamed: 0,Region,Global #,Without DNSSEC (Public) #,Without DNSSEC (Public) %,Without DNSSEC (Private) #,Without DNSSEC (Private) %,With Valid DNSSEC (Public) #,With Valid DNSSEC (Public) %,With Valid DNSSEC (Private) #,With Valid DNSSEC (Private) %,With Invalid DNSSEC (Public) #,With Invalid DNSSEC (Public) %,With Invalid DNSSEC (Private) #,With Invalid DNSSEC (Private) %
0,California,2,0,0.0,2,100.0,0,0.0,0,0.0,0,0.0,0,0.0
1,Florida,1,0,0.0,1,100.0,0,0.0,0,0.0,0,0.0,0,0.0
2,Georgia,1,0,0.0,1,100.0,0,0.0,0,0.0,0,0.0,0,0.0
3,Illinois,1,0,0.0,1,100.0,0,0.0,0,0.0,0,0.0,0,0.0
4,Michigan,1,0,0.0,1,100.0,0,0.0,0,0.0,0,0.0,0,0.0
5,Minnesota,1,0,0.0,1,100.0,0,0.0,0,0.0,0,0.0,0,0.0
6,New York,1,0,0.0,1,100.0,0,0.0,0,0.0,0,0.0,0,0.0
7,Texas,2,0,0.0,2,100.0,0,0.0,0,0.0,0,0.0,0,0.0
8,Colorado,1,1,100.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
9,Total,11,1,9.090909,10,90.909091,0,0.0,0,0.0,0,0.0,0,0.0
