In [1]:
from utilities.utilities import load_data, get_records_by_region, create_column, finalize_dataframe, get_extreme_values, create_directory_structure, save_table, save_report
# settings
region_column_name = 'Region'
table_name = 'https_by_region'
report_name = 'https_by_region'
category = 'https'
column_name_to_results_global = 'Global #'
create_directory_structure()

source_df = load_data('https_checker')

In [2]:
# sanity dataset
columns_names = ['has_http', 'has_https', 'forced_redirect_to_https', 'https_redirect_to_same_domain', 'only_https']
for column in columns_names:
    source_df.loc[source_df[column].isna(), column] = False

In [7]:
# Analyze of HEIs with HTTPS by region

# settings
column_to_sort = 'HTTP Only (Public) %'
sort_ascending = True
columns_to_display = [region_column_name, column_name_to_results_global]
analysis_df = get_records_by_region(source_df)

# create columns
only_public = 'category == "Public"'
only_private = 'category == "Private"'

# Column creation with distribution of records invalid by region
criteria = 'has_http == False & has_https == False & forced_redirect_to_https == False & https_redirect_to_same_domain == False &  only_https == False'
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='Invalid', criteria=f'{criteria}', columns_to_display=columns_to_display)

# Column creation with distribution of records with only HTTP by region
criteria = 'has_http == True & has_https == False & forced_redirect_to_https == False & only_https == False'
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='HTTP Only (Public)', criteria=f'{criteria} & {only_public}', columns_to_display=columns_to_display)
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='HTTP Only (Private)', criteria=f'{criteria} & {only_private}', columns_to_display=columns_to_display)

# Column creation with distribution of records with HTTPS without redirect by region
criteria = 'has_http == True & has_https == True & forced_redirect_to_https == False & only_https == False & https_redirect_to_same_domain == False'
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='HTTP and HTTPS (Public)', criteria=f'{criteria} & {only_public}', columns_to_display=columns_to_display)
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='HTTP and HTTPS (Private)', criteria=f'{criteria} & {only_private}', columns_to_display=columns_to_display)

# Column creation with distribution of records with forced redirect to HTTPS (same domain) by region
criteria = 'has_http == True & has_https == True & forced_redirect_to_https == True & https_redirect_to_same_domain == True'
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='HTTP to HTTPS (Same Domain) (Public)', criteria=f'{criteria} & {only_public}', columns_to_display=columns_to_display)
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='HTTP to HTTPS (Same Domain) (Private)', criteria=f'{criteria} & {only_private}', columns_to_display=columns_to_display)

# Column creation with distribution of records with forced redirect to HTTPS (other domain) by region
criteria = 'has_http == True & has_https == True & forced_redirect_to_https == True & https_redirect_to_same_domain == False'
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='HTTP to HTTPS (Other Domain) (Public)', criteria=f'{criteria} & {only_public}', columns_to_display=columns_to_display)
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='HTTP to HTTPS (Other Domain) (Private)', criteria=f'{criteria} & {only_private}', columns_to_display=columns_to_display)

# Column creation with distribution of records with only HTTPS by region
criteria = 'has_http == False & has_https == True & only_https == True'
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='HTTPS Only (Public)', criteria=f'{criteria} & {only_public}', columns_to_display=columns_to_display)
create_column(source_df=source_df, analysis_dataframe=analysis_df, column_name='HTTPS Only (Private)', criteria=f'{criteria} & {only_private}', columns_to_display=columns_to_display)



# Finalize dataframe
analysis_df = finalize_dataframe(dataframe=analysis_df, column_to_sort=column_to_sort, ascending=sort_ascending, columns_to_display=columns_to_display)
display(analysis_df)

# save to csv
save_table(analysis_df, category=category, table_name=table_name)


Unnamed: 0,Region,Global #,Invalid #,Invalid %,HTTP Only (Public) #,HTTP Only (Public) %,HTTP Only (Private) #,HTTP Only (Private) %,HTTP and HTTPS (Public) #,HTTP and HTTPS (Public) %,...,HTTP to HTTPS (Same Domain) (Private) #,HTTP to HTTPS (Same Domain) (Private) %,HTTP to HTTPS (Other Domain) (Public) #,HTTP to HTTPS (Other Domain) (Public) %,HTTP to HTTPS (Other Domain) (Private) #,HTTP to HTTPS (Other Domain) (Private) %,HTTPS Only (Public) #,HTTPS Only (Public) %,HTTPS Only (Private) #,HTTPS Only (Private) %
0,Alabama,32,0,0.0,0,0.0,1,3.125,0,0.0,...,16,50.0,0,0.0,0,0.0,0,0.0,0,0.0
1,Nevada,10,0,0.0,0,0.0,0,0.0,0,0.0,...,7,70.0,0,0.0,0,0.0,0,0.0,0,0.0
2,New Hampshire,14,0,0.0,0,0.0,0,0.0,0,0.0,...,7,50.0,0,0.0,0,0.0,0,0.0,0,0.0
3,New Jersey,48,0,0.0,0,0.0,0,0.0,0,0.0,...,33,68.75,0,0.0,0,0.0,0,0.0,0,0.0
4,New Mexico,10,0,0.0,0,0.0,0,0.0,2,20.0,...,3,30.0,0,0.0,0,0.0,0,0.0,0,0.0
5,New York,175,1,0.571429,0,0.0,1,0.571429,3,1.714286,...,114,65.142857,0,0.0,1,0.571429,0,0.0,1,0.571429
6,North Carolina,63,0,0.0,0,0.0,0,0.0,1,1.587302,...,45,71.428571,0,0.0,0,0.0,1,1.587302,0,0.0
7,North Dakota,9,0,0.0,0,0.0,0,0.0,0,0.0,...,2,22.222222,0,0.0,0,0.0,0,0.0,0,0.0
8,Ohio,74,0,0.0,0,0.0,0,0.0,3,4.054054,...,54,72.972973,0,0.0,0,0.0,0,0.0,0,0.0
9,Oklahoma,26,0,0.0,0,0.0,0,0.0,1,3.846154,...,11,42.307692,0,0.0,0,0.0,0,0.0,0,0.0


In [8]:
# Report in latex
report_results = get_extreme_values(analysis_df)

hei_public_invalid = format(report_results.get("Total").get("Invalid %"), ".2f")

hei_public_only_http = format(report_results.get("Total").get("HTTP Only (Public) %"), ".2f")
hei_public_only_https = format(report_results.get("Total").get("HTTPS Only (Public) %"), ".2f")
hei_public_http_and_https = format(report_results.get("Total").get("HTTP and HTTPS (Public) %"), ".2f")
hei_public_redirect_same = format(report_results.get("Total").get("HTTP to HTTPS (Same Domain) (Public) %"), ".2f")
hei_public_redirect_other = format(report_results.get("Total").get("HTTP to HTTPS (Other Domain) (Public) %"), ".2f")

hei_private_only_http = format(report_results.get("Total").get("HTTP Only (Private) %"), ".2f")
hei_private_only_https = format(report_results.get("Total").get("HTTPS Only (Private) %"), ".2f")
hei_private_http_and_https = format(report_results.get("Total").get("HTTP and HTTPS (Private) %"), ".2f")
hei_private_redirect_same = format(report_results.get("Total").get("HTTP to HTTPS (Same Domain) (Private) %"), ".2f")
hei_private_redirect_other = format(report_results.get("Total").get("HTTP to HTTPS (Other Domain) (Private) %"), ".2f")

report_figure = """
\\begin{figure}[htbp]
    \centering
    \includegraphics[width=0.48\\textwidth]{charts/https_by_region.pdf}
    \caption{Distribution of HTTPS by regions.}\label{fig:https}
\end{figure}
"""

report = f'{report_figure}\n\n'
report += f"""
Fig.~\\ref{{fig:https}} presents an overview of the use of \gls{{http}}/\gls{{https}} services at \glspl{{hei}} in \countryName. The indicators represent the following conditions:

\\begin{{itemize}}
    \item Invalid: The institution website doesn't work.
    \item \gls{{http}} only: The institution website uses only \gls{{http}}.
    \item \gls{{http}} {{\&}} \gls{{https}}: The institution offers both protocols but does not have any redirection to force the use of \gls{{https}}. All institutions in this category have a valid certificate.
    \item  \gls{{http}} to \gls{{https}} (other): Institution website that redirects the user to a secure page outside the main domain. All institutions in this category have a valid certificate.
    \item \gls{{http}} to \gls{{https}} (same): Institutions website that redirects the user to a secure page within the main domain, thus ensuring data protection. All institutions in this category have a valid certificate.
    \item \gls{{http}} only: The institution website uses only \gls{{http}}.
\end{{itemize}}\n\n"""

report += f"""
According to the data, {hei_public_only_http}\% of the public institutions analyzed have not implemented \gls{{https}} on their websites, while {hei_private_only_http}\% of the private institutions analyzed also dont use \gls{{https}}.

On a positive note, {hei_public_redirect_same}\% of the public institutions analyzed have implemented forced redirect to same domain, and {hei_public_redirect_other}\% to other domain. While {hei_private_redirect_same}\% of the private institutions have implemented forced redirect to same domain, and {hei_private_redirect_other}\% to other domain. And {hei_public_only_https}\% of the public institutions, and {hei_private_only_https}\% of the private institutions have implemented only \gls{{https}} on their websites, ensuring data protection.

However, {hei_public_http_and_https}\% of the public institutions, and {hei_private_http_and_https}\% of the private institutions keeps both protocols active, allowing the user to access the website through the \gls{{http}} protocol.\n\n"""

report += f"""
In terms of regional differences, private institutions in {report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("top_regions")[0][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("top_regions")[0][1], ".2f")}\%), {report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("top_regions")[1][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("top_regions")[1][1], ".2f")}\%), and {report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("top_regions")[2][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("top_regions")[2][1], ".2f")}\%), and public institutions in {report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("top_regions")[0][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("top_regions")[0][1], ".2f")}\%), {report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("top_regions")[1][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("top_regions")[1][1], ".2f")}\%), and {report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("top_regions")[2][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("top_regions")[2][1], ".2f")}\%) have a higher usage of \gls{{https}} with forced redirect to same domain.

In contrast, private institutions in {report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("bottom_regions")[0][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("bottom_regions")[0][1], ".2f")}\%), {report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("bottom_regions")[1][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("bottom_regions")[1][1], ".2f")}\%), and {report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("bottom_regions")[2][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Private) %").get("bottom_regions")[2][1], ".2f")}\%), and public institutions in {report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("bottom_regions")[0][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("bottom_regions")[0][1], ".2f")}\%), {report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("bottom_regions")[1][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("bottom_regions")[1][1], ".2f")}\%), and {report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("bottom_regions")[2][0]} ({format(report_results.get("HTTP to HTTPS (Same Domain) (Public) %").get("bottom_regions")[2][1], ".2f")}\%) have a lower usage of \gls{{https}} with forced redirect to same domain.

Finally, private institutions in {report_results.get("HTTP and HTTPS (Private) %").get("top_regions")[0][0]} ({format(report_results.get("HTTP and HTTPS (Private) %").get("top_regions")[0][1], ".2f")}\%), {report_results.get("HTTP and HTTPS (Private) %").get("top_regions")[1][0]} ({format(report_results.get("HTTP and HTTPS (Private) %").get("top_regions")[1][1], ".2f")}\%), and {report_results.get("HTTP and HTTPS (Private) %").get("top_regions")[2][0]} ({format(report_results.get("HTTP and HTTPS (Private) %").get("top_regions")[2][1], ".2f")}\%), and public institutions in {report_results.get("HTTP and HTTPS (Public) %").get("top_regions")[0][0]} ({format(report_results.get("HTTP and HTTPS (Public) %").get("top_regions")[0][1], ".2f")}\%), {report_results.get("HTTP and HTTPS (Public) %").get("top_regions")[1][0]} ({format(report_results.get("HTTP and HTTPS (Public) %").get("top_regions")[1][1], ".2f")}\%), and {report_results.get("HTTP and HTTPS (Public) %").get("top_regions")[2][0]} ({format(report_results.get("HTTP and HTTPS (Public) %").get("top_regions")[2][1], ".2f")}\%) have a higher usage of both protocols \gls{{dnssec}}, allowing the user to access the website through the \gls{{http}} protocol."""

print(report)
# save report to file txt
save_report(report=report, category=category, report_name=report_name)


\begin{figure}[htbp]
    \centering
    \includegraphics[width=0.48\textwidth]{charts/https_by_region.pdf}
    \caption{Distribution of HTTPS by regions.}\label{fig:https}
\end{figure}



Fig.~\ref{fig:https} presents an overview of the use of \gls{http}/\gls{https} services at \glspl{hei} in \countryName. The indicators represent the following conditions:

\begin{itemize}
    \item Invalid: The institution website doesn't work.
    \item \gls{http} only: The institution website uses only \gls{http}.
    \item \gls{http} {\&} \gls{https}: The institution offers both protocols but does not have any redirection to force the use of \gls{https}. All institutions in this category have a valid certificate.
    \item  \gls{http} to \gls{https} (other): Institution website that redirects the user to a secure page outside the main domain. All institutions in this category have a valid certificate.
    \item \gls{http} to \gls{https} (same): Institutions website that redirects the user to a sec