In [1]:
!pip install dataframe_image
!pip install playwright
!playwright install chromium

Collecting dataframe_image
  Downloading dataframe_image-0.2.7-py3-none-any.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting nbconvert>=5
  Using cached nbconvert-7.16.6-py3-none-any.whl (258 kB)
Collecting pandas>=0.24
  Using cached pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
Collecting aiohttp>=3.10.2
  Downloading aiohttp-3.11.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting playwright
  Downloading playwright-1.52.0-py3-none-manylinux1_x86_64.whl (45.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.1/45.1 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting cssselect
  Downloading cssselect-1.3.0-py3-no

In [2]:
import pandas as pd
import re
import dataframe_image as dfi
import asyncio


In [3]:
async def export_df_head_async(df, fname):
    await asyncio.to_thread(dfi.export, df.head(), fname)
    print("done")

In [4]:
df = pd.read_csv("./data/in/zillow_home_value_index.csv")
await export_df_head_async(df[["RegionID", "RegionName", "StateName", *df.columns[6:9]]].head(), "./images/zhvi.png")

done


In [5]:
df = pd.read_csv("./data/in/zillow_affordability.csv")
await export_df_head_async(df[["RegionID", "RegionName", "StateName", *df.columns[6:9]]].head(), "./images/zaff.png")

done


In [6]:
df = pd.read_csv("./data/in/zillow_market_heat.csv")
await export_df_head_async(df[["RegionID", "RegionName", "StateName", *df.columns[6:9]]].head(), "./images/zmah.png")

done


In [7]:
df = pd.read_csv("./data/out/zillow_market_2018+.csv")
await export_df_head_async(df.head(), "./images/zall.png")

done


In [8]:
df = pd.read_csv("./data/out/data.gov_personal_income.csv")
await export_df_head_async(df.head(), "./images/demo.png")

done


In [26]:
import numpy as np
def reorder_column(df, col, order):
    df[col] = pd.Categorical(df[col], categories=order, ordered=True)
    df = df.sort_values(by=col).reset_index(drop=True)
    return df

income_order = [
    "No Income",
    "$5,000 to $9,999",
    "$10,000 to $14,999",
    "$15,000 to $24,999",
    "$25,000 to $34,999",
    "$35,000 to $49,999",
    "$50,000 to $74,999",
    "$75,000 and over",
]

edu_order = [
    "No high school diploma",
    "High school or equivalent",
    "Some college, less than 4-yr degree",
    "Bachelor's degree or higher",
]

race_order = ["White", "African American", "Asian", "Hispanic", "Other"]
age_order = ["00 to 17", "18 to 64", "65 to 80+"]
gender_order = ["Male", "Female"]

In [34]:

latex_args = {
    "index":False,
    "escape":True,
    "float_format":"{:.3f}".format,
    "multirow":True,
    "multicolumn":True,
    "position":"!htb",
    "caption":"Values for each column in demographics dataset",
    "label":"tab:demo-cols"
}
print(pd.DataFrame(columns=["Column", "Values"], data=[
    ["Age", ", ".join(age_order)],
    ["Gender", ", ".join(gender_order)],
    ["Educational Attainment", ", ".join(edu_order)],
    ["Race", ", ".join(race_order)],
    ["Personal Income", ", ".join(income_order)],
    ]).to_latex(**latex_args))

\begin{table}[!htb]
\caption{Values for each column in demographics dataset}
\label{tab:demo-cols}
\begin{tabular}{ll}
\toprule
Column & Values \\
\midrule
Age & 00 to 17, 18 to 64, 65 to 80+ \\
Gender & Male, Female \\
Educational Attainment & No high school diploma, High school or equivalent, Some college, less than 4-yr degree, Bachelor's degree or higher \\
Race & White, African American, Asian, Hispanic, Other \\
Personal Income & No Income, \$5,000 to \$9,999, \$10,000 to \$14,999, \$15,000 to \$24,999, \$25,000 to \$34,999, \$35,000 to \$49,999, \$50,000 to \$74,999, \$75,000 and over \\
\bottomrule
\end{tabular}
\end{table}

