In [1]:
# imports and setup
import pandas as pd
import numpy as np
# from weasyprint import HTML, CSS

pop_history_df = pd.read_csv("./data.csv")  # read data from csv
pop_history_df.sort_values(['State', 'Year'], inplace=True)  # sort df by State & Year
pop_history_df.reset_index(inplace=True)
# pop_history_df.head(10)  # display initial df

In [2]:
# pivot
pop_history_pivot = pop_history_df.pivot(index="State", columns="Year", values=['Population'])
# pop_history_pivot.head()

In [3]:
# get % change year over year from pivot
pop_pct_change_pivot = pop_history_pivot.pct_change(axis='columns',)
pop_pct_change_pivot.columns.set_levels(['pct_change'], level=0, inplace=True)
# pop_pct_change_pivot.head()

  pop_pct_change_pivot.columns.set_levels(['pct_change'], level=0, inplace=True)


In [4]:
# reshape/melt pivot table to match original dataframe
pop_pct_change_df =  pop_pct_change_pivot.melt(ignore_index=False).rename(columns={"value": "pct_change"})[['Year', 'pct_change']]
pop_pct_change_df = pop_pct_change_df.reset_index().sort_values(['State', 'Year']).reset_index()
# pop_pct_change_df.head(10)

In [5]:
# function to produce a set of the prime factors of a number
def calculate_prime_factors(n):
    prime_factors = set()
    if n % 2 == 0:
        prime_factors.add(2)
    while n % 2 == 0:
        n = n // 2
        if n == 1:
            return prime_factors
    for factor in range(3, n + 1, 2):
        if n % factor == 0:
            prime_factors.add(factor)
            while n % factor == 0:
                n = n // factor
                if n == 1:
                    return prime_factors

In [6]:
# update original table with % change
pop_history_df["pct_change"] = pop_pct_change_df["pct_change"]
pop_history_pivot = pop_history_df.pivot(index=["State"], 
                                         columns=["Year"], 
                                         values=['Population', 'pct_change'])


In [7]:

# generate table data array with string formatting
years = sorted(list(set(pop_history_df.Year)))
header_table_row = ['State Name'] + years + [f'{years[-1]} Factors']
data_table_array = []
# data_table_array.append(header_table_row)
for state, row in pop_history_pivot.iterrows():
    # print(data_table_array)
    new_table_row = [state]
    for year in years:
        # print(year)
        if year == years[0]:
            new_table_row += [f"{int(row['Population', year])}"]
        else:
            new_table_row += [f"{int(row['Population', year])}<br/>({row['pct_change', year]:>0.2%})"]
    prime_factors = calculate_prime_factors(int(row['Population', year]))  # capture the prime factors of the last year of the previous state
    prime_factors = list(map(str, sorted(prime_factors))) # sort & convert to strings
    new_table_row += [ ';'.join(prime_factors) ]  # format and add to new row
    # print(prime_factors)
    # print(new_table_row)
    data_table_array.append(new_table_row)
    pass
# print(header_table_row)
# data_table_array[0:2]


In [8]:
# new dataframe from formatted data
formatted_df = pd.DataFrame(data=data_table_array, columns=header_table_row)
# formatted_df.head(10)

In [9]:

# write data to html
page_title='UCLA Solution'
report_title = 'Population Change Report'
report_subtitle = f'from {years[0]} to {years[-1]}'

html = f'''
    <html>
        <head>
            <title>{page_title}</title>
            <link rel="stylesheet" href="./styles.css">
        </head>
        <body>
            <h1>{report_title}</h1>
            <p><i>{report_subtitle}</i></p>
            <br/>
            {formatted_df.to_html(index=False, escape=False)}
        </body>
    </html>
    '''
# 3. Write the html string as an HTML file
with open('html_report.html', 'w') as f:
    f.write(html)