In [1]:
import csv

In [11]:
with open("pop_estimates.csv") as file:
    data = csv.reader(file)
    for _ in range(10):
        print(next(data))

['Geographic Area', 'July 1, 2001 Estimate', 'July 1, 2000 Estimate', 'April 1, 2000 Population Estimates Base']
['United States', ' 284,796,887 ', ' 282,124,631 ', ' 281,421,906 ']
['Alabama', ' 4,464,356 ', ' 4,451,493 ', ' 4,447,100 ']
['Alaska', ' 634,892 ', ' 627,601 ', ' 626,932 ']
['Arizona', ' 5,307,331 ', ' 5,165,274 ', ' 5,130,632 ']
['Arkansas', ' 2,692,090 ', ' 2,678,030 ', ' 2,673,400 ']
['California', ' 34,501,130 ', ' 34,000,446 ', ' 33,871,648 ']
['Colorado', ' 4,417,714 ', ' 4,323,410 ', ' 4,301,261 ']
['Connecticut', ' 3,425,074 ', ' 3,410,079 ', ' 3,405,565 ']
['Delaware', ' 796,165 ', ' 786,234 ', ' 783,600 ']


In [12]:
def name_int(value: str):
    try:
        return int(value.strip().replace(",", ""))
    except Exception as exc:
        raise ValueError("data couldn't be parsed into a valid integer")

In [13]:
from typing import Annotated
from pydantic import BeforeValidator

In [15]:
FunkyInt = Annotated[
    int,
    BeforeValidator(name_int)
]

In [16]:
from pydantic import BaseModel

In [17]:
class Estimate(BaseModel):
    ares: str
    july_1_2001: FunkyInt
    july_1_2000: FunkyInt
    april_1_2000: FunkyInt

In [23]:
with open("pop_estimates.csv") as file:
    list_ = []
    data = csv.DictReader(
        file,
        fieldnames=["ares", "july_1_2001", "july_1_2000", "april_1_2000"]
    )
    next(data)
    for row in data:
        list_.append(Estimate(**row))

In [24]:
list_

[Estimate(ares='United States', july_1_2001=284796887, july_1_2000=282124631, april_1_2000=281421906),
 Estimate(ares='Alabama', july_1_2001=4464356, july_1_2000=4451493, april_1_2000=4447100),
 Estimate(ares='Alaska', july_1_2001=634892, july_1_2000=627601, april_1_2000=626932),
 Estimate(ares='Arizona', july_1_2001=5307331, july_1_2000=5165274, april_1_2000=5130632),
 Estimate(ares='Arkansas', july_1_2001=2692090, july_1_2000=2678030, april_1_2000=2673400),
 Estimate(ares='California', july_1_2001=34501130, july_1_2000=34000446, april_1_2000=33871648),
 Estimate(ares='Colorado', july_1_2001=4417714, july_1_2000=4323410, april_1_2000=4301261),
 Estimate(ares='Connecticut', july_1_2001=3425074, july_1_2000=3410079, april_1_2000=3405565),
 Estimate(ares='Delaware', july_1_2001=796165, july_1_2000=786234, april_1_2000=783600),
 Estimate(ares='District of Columbia', july_1_2001=571822, july_1_2000=571066, april_1_2000=572059),
 Estimate(ares='Florida', july_1_2001=16396515, july_1_2000=16

In [37]:
def estimates():
    with open("pop_estimates.csv") as file:
        data = csv.DictReader(
            file,
            fieldnames=["ares", "july_1_2001", "july_1_2000", "april_1_2000"]
        )
        next(data)
        for row in data:
            x = Estimate.model_validate(row)
            print(x.__repr__())
            yield x

In [38]:
for estimate in estimates():
    ...

Estimate(ares='United States', july_1_2001=284796887, july_1_2000=282124631, april_1_2000=281421906)
Estimate(ares='Alabama', july_1_2001=4464356, july_1_2000=4451493, april_1_2000=4447100)
Estimate(ares='Alaska', july_1_2001=634892, july_1_2000=627601, april_1_2000=626932)
Estimate(ares='Arizona', july_1_2001=5307331, july_1_2000=5165274, april_1_2000=5130632)
Estimate(ares='Arkansas', july_1_2001=2692090, july_1_2000=2678030, april_1_2000=2673400)
Estimate(ares='California', july_1_2001=34501130, july_1_2000=34000446, april_1_2000=33871648)
Estimate(ares='Colorado', july_1_2001=4417714, july_1_2000=4323410, april_1_2000=4301261)
Estimate(ares='Connecticut', july_1_2001=3425074, july_1_2000=3410079, april_1_2000=3405565)
Estimate(ares='Delaware', july_1_2001=796165, july_1_2000=786234, april_1_2000=783600)
Estimate(ares='District of Columbia', july_1_2001=571822, july_1_2000=571066, april_1_2000=572059)
Estimate(ares='Florida', july_1_2001=16396515, july_1_2000=16054328, april_1_2000=

In [39]:
class Model(BaseModel):
    country: str = "USA"

In [43]:
Model()

Model(country='USA')

In [44]:
print(Model())

country='USA'
