In [1]:
from utilities.utilities import load_data, get_records_by_region, create_column, finalize_dataframe, get_extreme_values, create_directory_structure, save_table, save_report, pd, assign_quartile, rank_key_size
# settings
region_column_name = 'Region'

category = 'security_layer'
column_name_to_results_global = 'Global #'
create_directory_structure()

source_df = load_data('security_layer_checker')

In [2]:
# sanity dataset
source_df.loc[source_df['grade'].isna(), 'grade'] = 'M'
source_df.info()

In [None]:
# Analyze of CA (pub/Pvt)

# settings
sort_ascending = False
config = [
    {'table_name': 'ca_public', 'hei_type': 'Public'},
    {'table_name': 'ca_private', 'hei_type': 'Private'}
]
for config_item in config:
    table_name = config_item['table_name']
    hei_type = config_item['hei_type']

    filtered_df = source_df.query(f'grade != "M" & category == "{hei_type}"').groupby('issuer').count()['url'].sort_values(ascending=False)
    top_5_df = filtered_df.head(5).reset_index()
    other_total = filtered_df[5:].sum()

    top_5_df.loc[5] = ['Others', other_total]
    top_5_df['percentual'] = (top_5_df['url'] / top_5_df['url'].sum()) * 100
    top_5_df.columns = ['Certificate Authority', 'Total #', 'Total %']
    top_5_df.reset_index(drop=True, inplace=True)

    # save to csv
    save_table(top_5_df, category=category, table_name=table_name)

In [3]:
# Analyze of key length by region (Pub/Pvt)

# settings
column_to_sort = 'Without SSL %'
sort_ascending = False
config = [
    {'table_name': 'key_length_by_region_public', 'hei_type': 'Public'},
    {'table_name': 'key_length_by_region_private', 'hei_type': 'Private'}
]
dfs = []
key_lengths = source_df['key_size'].unique()
key_lengths = [key_length for key_length in key_lengths if key_length != 0]
key_lengths = sorted(key_lengths, key=rank_key_size)

for config_item in config:
    table_name = config_item['table_name']
    hei_type = config_item['hei_type']
    columns_to_display = [region_column_name.title(), column_name_to_results_global]
    analysis_df = get_records_by_region(source_df, hei_type=hei_type)

    # create columns
    # Column creation with distribution of records without SSL by region
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Without SSL', criteria=f'grade == "M" & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Creating column with the distribution of Key Length by region
    #deveria obter todos os valores desta coluna de forma dinamica
    for key_length in key_lengths:
        algorithm = 'RSA' if key_length >= 1024 else 'ECC'
        create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name=f'{key_length} ({algorithm})', criteria=f'grade != "M" & key_size == {key_length} & category == "{hei_type}"', columns_to_display=columns_to_display)

    # Finalize dataframe
    analysis_df = finalize_dataframe(dataframe=analysis_df, column_to_sort=column_to_sort, ascending=sort_ascending, columns_to_display=columns_to_display)
    display(analysis_df)
    dfs.append(analysis_df)
    # save to csv
    save_table(analysis_df, category=category, table_name=table_name)

df_public = dfs[0].add_suffix('(pub)')
df_private = dfs[1].add_suffix('(pvt)')
df_public = df_public.rename(columns={'Region(pub)': 'Region'})
df_private = df_private.rename(columns={'Region(pvt)': 'Region'})
df_combined = df_public.merge(df_private, on='Region', how='outer')
df_combined.fillna(0, inplace=True)
percent_columns_pub = [col for col in df_combined.columns if col.endswith('%(pub)')]

# remove columns with # in the name
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('#')]
#remove columns global
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('Global')]
# remove '%' from name of columns
df_combined.columns = df_combined.columns.str.replace('%', '')

ranks_columns = ['Rank', 'Rank2', 'Rank3', 'Rank4']
#add column with sum of best columns
df_combined[ranks_columns[0]] = df_combined['384 (ECC) (pub)'] + df_combined['384 (ECC) (pvt)']
df_combined[ranks_columns[1]] = df_combined['4096 (RSA) (pub)'] + df_combined['4096 (RSA) (pvt)']
df_combined[ranks_columns[2]] = df_combined['256 (ECC) (pub)'] + df_combined['256 (ECC) (pvt)']
df_combined[ranks_columns[3]] = df_combined['3072 (RSA) (pub)'] + df_combined['3072 (RSA) (pvt)']


#order dataframe by column Rank (from highest to lowest)
df_combined = df_combined.sort_values(by=ranks_columns, ascending=False)
# move just row with 'Total' in column Region to the end of the dataframe. (Use pandas.concat instead of append to avoid duplicates)
df_combined = pd.concat([df_combined[df_combined['Region'] != 'Total'], df_combined[df_combined['Region'] == 'Total']])
# reset index
df_combined.reset_index(drop=True, inplace=True)
# remove column Rank
df_combined.drop(columns=ranks_columns, inplace=True)
#Add a column with the quartile corresponding to the position of the row, that is, considering the total of records -1 (to exclude the total row), if a row is in position 2 it should belong to the first quartile.
df_combined['Quartile'] = df_combined.index.map(lambda rank: assign_quartile(rank, len(df_combined)-1))
# moved column 'Quartile' to the second position
cols = list(df_combined.columns)
cols = [cols[0]] + [cols[-1]] + cols[1:-1]
df_combined = df_combined[cols]


save_table(df_combined, category=category, table_name='key_length_by_region_combined')

Unnamed: 0,Region,Global #,Without SSL (Public) #,Without SSL (Public) %,Without SSL (Private) #,Without SSL (Private) %,256 (ECC) (Public) #,256 (ECC) (Public) %,256 (ECC) (Private) #,256 (ECC) (Private) %,...,2048 (RSA) (Private) #,2048 (RSA) (Private) %,3072 (RSA) (Public) #,3072 (RSA) (Public) %,3072 (RSA) (Private) #,3072 (RSA) (Private) %,4096 (RSA) (Public) #,4096 (RSA) (Public) %,4096 (RSA) (Private) #,4096 (RSA) (Private) %
0,California,2,0,0.0,0,0.0,0,0.0,0,0.0,...,2,100.0,0,0.0,0,0.0,0,0.0,0,0.0
1,Colorado,1,0,0.0,0,0.0,0,0.0,0,0.0,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
2,Florida,1,0,0.0,0,0.0,0,0.0,0,0.0,...,1,100.0,0,0.0,0,0.0,0,0.0,0,0.0
3,Georgia,1,0,0.0,0,0.0,0,0.0,0,0.0,...,1,100.0,0,0.0,0,0.0,0,0.0,0,0.0
4,Michigan,1,0,0.0,0,0.0,0,0.0,0,0.0,...,1,100.0,0,0.0,0,0.0,0,0.0,0,0.0
5,Minnesota,1,0,0.0,0,0.0,0,0.0,0,0.0,...,1,100.0,0,0.0,0,0.0,0,0.0,0,0.0
6,New York,1,0,0.0,0,0.0,0,0.0,0,0.0,...,0,0.0,0,0.0,0,0.0,0,0.0,1,100.0
7,Texas,2,0,0.0,0,0.0,0,0.0,0,0.0,...,2,100.0,0,0.0,0,0.0,0,0.0,0,0.0
8,Total,10,0,0.0,0,0.0,0,0.0,0,0.0,...,8,80.0,0,0.0,0,0.0,0,0.0,1,10.0


In [5]:
# Analyze of SSL Algorithms by region (Pub/Pvt)
# settings
column_to_sort = 'Without SSL %'
sort_ascending = False
config = [
    {'table_name': 'SSL_Algorithms_by_region_public', 'hei_type': 'Public'},
    {'table_name': 'SSL_Algorithms_by_region_private', 'hei_type': 'Private'}
]
dfs = []
for config_item in config:
    table_name = config_item['table_name']
    hei_type = config_item['hei_type']
    columns_to_display = [region_column_name.title(), column_name_to_results_global]
    analysis_df = get_records_by_region(source_df, hei_type=hei_type)
    # create columns
    # Column creation with distribution of records without SSL by region
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Without SSL', criteria=f'grade == "M" & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Creating column with the distribution of SSL Algorithms by region
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='RSA', criteria=f'grade != "M" & key_alg == "RSA" & category == "{hei_type}"', columns_to_display=columns_to_display)
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='ECC', criteria=f'grade != "M" & key_alg == "EC" & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Finalize dataframe
    analysis_df = finalize_dataframe(dataframe=analysis_df, column_to_sort=column_to_sort, ascending=sort_ascending, columns_to_display=columns_to_display)
    display(analysis_df)
    dfs.append(analysis_df)
    dfs.append(analysis_df)
    # save to csv
    save_table(analysis_df, category=category, table_name=table_name)

df_public = dfs[0].add_suffix('(pub)')
df_private = dfs[1].add_suffix('(pvt)')
df_public = df_public.rename(columns={'Region(pub)': 'Region'})
df_private = df_private.rename(columns={'Region(pvt)': 'Region'})
df_combined = df_public.merge(df_private, on='Region', how='outer')
df_combined.fillna(0, inplace=True)
# remove columns with # in the name
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('#')]
#remove columns global
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('Global')]
# remove '%' from name of columns
df_combined.columns = df_combined.columns.str.replace('%', '')

ranks_columns = ['Rank', 'Rank2', 'Rank3']
#add column with sum of best columns
df_combined[ranks_columns[0]] = df_combined['ECC (pub)'] + df_combined['ECC (pvt)']
df_combined[ranks_columns[1]] = df_combined['RSA (pub)'] + df_combined['RSA (pvt)']
df_combined[ranks_columns[2]] = df_combined['Without SSL (pub)'] + df_combined['Without SSL (pvt)']


#order dataframe by column Rank (from highest to lowest)
df_combined = df_combined.sort_values(by=ranks_columns, ascending=False)
# move just row with 'Total' in column Region to the end of the dataframe. (Use pandas.concat instead of append to avoid duplicates)
df_combined = pd.concat([df_combined[df_combined['Region'] != 'Total'], df_combined[df_combined['Region'] == 'Total']])
# reset index
df_combined.reset_index(drop=True, inplace=True)
# remove column Rank
df_combined.drop(columns=ranks_columns, inplace=True)
#Add a column with the quartile corresponding to the position of the row, that is, considering the total of records -1 (to exclude the total row), if a row is in position 2 it should belong to the first quartile.
df_combined['Quartile'] = df_combined.index.map(lambda rank: assign_quartile(rank, len(df_combined)-1))
# moved column 'Quartile' to the second position
cols = list(df_combined.columns)
cols = [cols[0]] + [cols[-1]] + cols[1:-1]
df_combined = df_combined[cols]


save_table(df_combined, category=category, table_name='SSL_Algorithms_by_region_combined')

Unnamed: 0,Region,Global #,Without SSL (Public) #,Without SSL (Public) %,Without SSL (Private) #,Without SSL (Private) %,RSA (Public) #,RSA (Public) %,RSA (Private) #,RSA (Private) %,ECC (Public) #,ECC (Public) %,ECC (Private) #,ECC (Private) %
0,California,2,0,0.0,0,0.0,0,0.0,2,100.0,0,0.0,0,0.0
1,Colorado,1,0,0.0,0,0.0,1,100.0,0,0.0,0,0.0,0,0.0
2,Florida,1,0,0.0,0,0.0,0,0.0,1,100.0,0,0.0,0,0.0
3,Georgia,1,0,0.0,0,0.0,0,0.0,1,100.0,0,0.0,0,0.0
4,Michigan,1,0,0.0,0,0.0,0,0.0,1,100.0,0,0.0,0,0.0
5,Minnesota,1,0,0.0,0,0.0,0,0.0,1,100.0,0,0.0,0,0.0
6,New York,1,0,0.0,0,0.0,0,0.0,1,100.0,0,0.0,0,0.0
7,Texas,2,0,0.0,0,0.0,0,0.0,2,100.0,0,0.0,0,0.0
8,Total,10,0,0.0,0,0.0,1,10.0,9,90.0,0,0.0,0,0.0


In [7]:
# Analyze of Worst supported SSL/TLS versions by region (Pub/Pvt)

# settings
column_to_sort = 'Without SSL %'
sort_ascending = False
config = [
    {'table_name': 'Worst_SSL_supported_by_region_public', 'hei_type': 'Public'},
    {'table_name': 'Worst_SSL_supported_by_region_private', 'hei_type': 'Private'}
]
dfs = []
for config_item in config:
    table_name = config_item['table_name']
    hei_type = config_item['hei_type']
    columns_to_display = [region_column_name.title(), column_name_to_results_global]
    analysis_df = get_records_by_region(source_df, hei_type=hei_type)
    # create columns
    # Column creation with distribution of records without SSL by region
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Without SSL', criteria=f'grade == "M" & category == "{hei_type}"', columns_to_display=columns_to_display)

    # Creating column with the distribution of Worst supported SSL/TLS versions by region
    versions = ['SSLv2.0', 'SSLv3.0', 'TLSv1.0', 'TLSv1.1', 'TLSv1.2', 'TLSv1.3']
    only_https = 'grade != "M" &'
    for i in range(len(versions)):
        current_version = f'`{versions[i]}` == True &' if i != 0 else f'`{versions[i]}` == True'
        previous_versions = ' & '.join([f'`{versions[j]}` == False' for j in range(i)])
        criteria = f'{only_https} {current_version} {previous_versions}'
        create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name=f'{versions[i]}', criteria=f'{criteria} & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Finalize dataframe
    analysis_df = finalize_dataframe(dataframe=analysis_df, column_to_sort=column_to_sort, ascending=sort_ascending, columns_to_display=columns_to_display)
    display(analysis_df)
    dfs.append(analysis_df)
    # save to csv
    save_table(analysis_df, category=category, table_name=table_name)


df_public = dfs[0].add_suffix('(pub)')
df_private = dfs[1].add_suffix('(pvt)')
df_public = df_public.rename(columns={'Region(pub)': 'Region'})
df_private = df_private.rename(columns={'Region(pvt)': 'Region'})
df_combined = df_public.merge(df_private, on='Region', how='outer')
df_combined.fillna(0, inplace=True)
# remove columns with # in the name
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('#')]
#remove columns global
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('Global')]
# remove '%' from name of columns
df_combined.columns = df_combined.columns.str.replace('%', '')

ranks_columns = ['Rank', 'Rank2']
#add column with sum of best columns
df_combined[ranks_columns[0]] = df_combined['TLSv1.3 (pub)'] + df_combined['TLSv1.3 (pvt)']
df_combined[ranks_columns[1]] = df_combined['TLSv1.2 (pub)'] + df_combined['TLSv1.2 (pvt)']


#order dataframe by column Rank (from highest to lowest)
df_combined = df_combined.sort_values(by=ranks_columns, ascending=False)
# move just row with 'Total' in column Region to the end of the dataframe. (Use pandas.concat instead of append to avoid duplicates)
df_combined = pd.concat([df_combined[df_combined['Region'] != 'Total'], df_combined[df_combined['Region'] == 'Total']])
# reset index
df_combined.reset_index(drop=True, inplace=True)
# remove column Rank
df_combined.drop(columns=ranks_columns, inplace=True)
#Add a column with the quartile corresponding to the position of the row, that is, considering the total of records -1 (to exclude the total row), if a row is in position 2 it should belong to the first quartile.
df_combined['Quartile'] = df_combined.index.map(lambda rank: assign_quartile(rank, len(df_combined)-1))
# moved column 'Quartile' to the second position
cols = list(df_combined.columns)
cols = [cols[0]] + [cols[-1]] + cols[1:-1]
df_combined = df_combined[cols]


save_table(df_combined, category=category, table_name='Worst_SSL_supported_by_region_combined')

Unnamed: 0,Region,Global #,Without SSL (Public) #,Without SSL (Public) %,Without SSL (Private) #,Without SSL (Private) %,SSLv2.0 (Public) #,SSLv2.0 (Public) %,SSLv2.0 (Private) #,SSLv2.0 (Private) %,...,TLSv1.1 (Private) #,TLSv1.1 (Private) %,TLSv1.2 (Public) #,TLSv1.2 (Public) %,TLSv1.2 (Private) #,TLSv1.2 (Private) %,TLSv1.3 (Public) #,TLSv1.3 (Public) %,TLSv1.3 (Private) #,TLSv1.3 (Private) %
0,California,2,0,0.0,0,0.0,0,0.0,0,0.0,...,0,0.0,0,0.0,2,100.0,0,0.0,0,0.0
1,Colorado,1,0,0.0,0,0.0,0,0.0,0,0.0,...,0,0.0,1,100.0,0,0.0,0,0.0,0,0.0
2,Florida,1,0,0.0,0,0.0,0,0.0,0,0.0,...,0,0.0,0,0.0,1,100.0,0,0.0,0,0.0
3,Georgia,1,0,0.0,0,0.0,0,0.0,0,0.0,...,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
4,Michigan,1,0,0.0,0,0.0,0,0.0,0,0.0,...,0,0.0,0,0.0,1,100.0,0,0.0,0,0.0
5,Minnesota,1,0,0.0,0,0.0,0,0.0,0,0.0,...,0,0.0,0,0.0,1,100.0,0,0.0,0,0.0
6,New York,1,0,0.0,0,0.0,0,0.0,0,0.0,...,0,0.0,0,0.0,1,100.0,0,0.0,0,0.0
7,Texas,2,0,0.0,0,0.0,0,0.0,0,0.0,...,0,0.0,0,0.0,2,100.0,0,0.0,0,0.0
8,Total,10,0,0.0,0,0.0,0,0.0,0,0.0,...,0,0.0,1,10.0,8,80.0,0,0.0,0,0.0


In [9]:
# Analyze of valid SSL/TLS by region (Pub/Pvt)

# settings
column_to_sort = 'Without SSL %'
sort_ascending = False
config = [
    {'table_name': 'valid_ssl_by_region_public', 'hei_type': 'Public'},
    {'table_name': 'valid_ssl_by_region_private', 'hei_type': 'Private'}
]
dfs = []
for config_item in config:
    table_name = config_item['table_name']
    hei_type = config_item['hei_type']
    columns_to_display = [region_column_name.title(), column_name_to_results_global]
    analysis_df = get_records_by_region(source_df, hei_type=hei_type)
    # create columns
    # Column creation with distribution of records without SSL by region
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Without SSL', criteria=f'grade == "M" & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Creating column with the distribution of valid SSL/TLS by region
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Valid Configuration', criteria=f'grade != "M" & is_valid == True & category == "{hei_type}"', columns_to_display=columns_to_display)
    create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Invalid Configuration', criteria=f'grade != "M" & is_valid == False & category == "{hei_type}"', columns_to_display=columns_to_display)
    # Finalize dataframe
    analysis_df = finalize_dataframe(dataframe=analysis_df, column_to_sort=column_to_sort, ascending=sort_ascending, columns_to_display=columns_to_display)
    display(analysis_df)
    dfs.append(analysis_df)
    # save to csv
    save_table(analysis_df, category=category, table_name=table_name)


df_public = dfs[0].add_suffix('(pub)')
df_private = dfs[1].add_suffix('(pvt)')
df_public = df_public.rename(columns={'Region(pub)': 'Region'})
df_private = df_private.rename(columns={'Region(pvt)': 'Region'})
df_combined = df_public.merge(df_private, on='Region', how='outer')
df_combined.fillna(0, inplace=True)
# remove columns with # in the name
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('#')]
#remove columns global
df_combined = df_combined.loc[:, ~df_combined.columns.str.contains('Global')]
# remove '%' from name of columns
df_combined.columns = df_combined.columns.str.replace('%', '')

ranks_columns = ['Rank', 'Rank2', 'Rank3']
#add column with sum of best columns
df_combined[ranks_columns[0]] = df_combined['Valid Configuration (pub)'] + df_combined['Valid Configuration (pvt)']
df_combined[ranks_columns[1]] = df_combined['Invalid Configuration (pub)'] + df_combined['Invalid Configuration (pvt)']
df_combined[ranks_columns[2]] = df_combined['Without SSL (pub)'] + df_combined['Without SSL (pvt)']


#order dataframe by column Rank (from highest to lowest)
df_combined = df_combined.sort_values(by=ranks_columns, ascending=False)
# move just row with 'Total' in column Region to the end of the dataframe. (Use pandas.concat instead of append to avoid duplicates)
df_combined = pd.concat([df_combined[df_combined['Region'] != 'Total'], df_combined[df_combined['Region'] == 'Total']])
# reset index
df_combined.reset_index(drop=True, inplace=True)
# remove column Rank
df_combined.drop(columns=ranks_columns, inplace=True)
#Add a column with the quartile corresponding to the position of the row, that is, considering the total of records -1 (to exclude the total row), if a row is in position 2 it should belong to the first quartile.
df_combined['Quartile'] = df_combined.index.map(lambda rank: assign_quartile(rank, len(df_combined)-1))
# moved column 'Quartile' to the second position
cols = list(df_combined.columns)
cols = [cols[0]] + [cols[-1]] + cols[1:-1]
df_combined = df_combined[cols]


save_table(df_combined, category=category, table_name='valid_ssl_by_region_combined')

Unnamed: 0,Region,Global #,Without SSL (Public) #,Without SSL (Public) %,Without SSL (Private) #,Without SSL (Private) %,Valid Configuration (Public) #,Valid Configuration (Public) %,Invalid Configuration (Public) #,Invalid Configuration (Public) %,Valid Configuration (Private) #,Valid Configuration (Private) %,Invalid Configuration (Private) #,Invalid Configuration (Private) %
0,California,2,0,0.0,0,0.0,0,0.0,0,0.0,2,100.0,0,0.0
1,Colorado,1,0,0.0,0,0.0,1,100.0,0,0.0,0,0.0,0,0.0
2,Florida,1,0,0.0,0,0.0,0,0.0,0,0.0,1,100.0,0,0.0
3,Georgia,1,0,0.0,0,0.0,0,0.0,0,0.0,1,100.0,0,0.0
4,Michigan,1,0,0.0,0,0.0,0,0.0,0,0.0,1,100.0,0,0.0
5,Minnesota,1,0,0.0,0,0.0,0,0.0,0,0.0,1,100.0,0,0.0
6,New York,1,0,0.0,0,0.0,0,0.0,0,0.0,1,100.0,0,0.0
7,Texas,2,0,0.0,0,0.0,0,0.0,0,0.0,2,100.0,0,0.0
8,Total,10,0,0.0,0,0.0,1,10.0,0,0.0,9,90.0,0,0.0
