In [1]:
from pathlib import Path

from mndot_bid_etl.transform.abstract import read_abstract_csv
from mndot_bid_etl.transform.transform_bid import transform_bid_df
from mndot_bid_etl.transform.transform_bidder import transform_bidder_df
from mndot_bid_etl.transform.transform_contract import transform_contract_df

In [2]:
csv_file = Path("../data/csv/2022/220002.csv").resolve()
abstract = read_abstract_csv(csv_file)

# Bid Transformer (Object)

In [3]:
from mndot_bid_etl.transform.recipies import bid_transformer

transformed_bid_df = bid_transformer.apply(abstract.bid_df)
transformed_bid_df.head()

Unnamed: 0,contract_id,item_id,long_description,quantity,bidder_id,unit_price
0,220002,2011.601/01000,AS BUILT,1.0,engineers,1500000
1,220002,2021.501/00010,MOBILIZATION,1.0,engineers,25000000
2,220002,2051.501/00010,MAINT AND RESTORATION OF HAUL ROADS,1.0,engineers,100000
3,220002,2101.505/00020,CLEARING,1.4,engineers,800000
4,220002,2101.505/00030,GRUBBING,1.4,engineers,400000


In [4]:
transformed_bid_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 568 entries, 0 to 567
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   contract_id       568 non-null    string 
 1   item_id           568 non-null    string 
 2   long_description  568 non-null    string 
 3   quantity          568 non-null    float64
 4   bidder_id         568 non-null    string 
 5   unit_price        568 non-null    int64  
dtypes: float64(1), int64(1), string(4)
memory usage: 26.8 KB


#### Bid Transformer

In [3]:
bid_df = transform_bid_df(df=abstract.bid_df, contract_id=abstract.contract_id)

In [4]:
bid_df.head()

Unnamed: 0,item_id,long_description,quantity,bidder_id,unit_price,contract_id
0,2011.601/01000,AS BUILT,1.0,engineers,1500000,220002
1,2021.501/00010,MOBILIZATION,1.0,engineers,25000000,220002
2,2051.501/00010,MAINT AND RESTORATION OF HAUL ROADS,1.0,engineers,100000,220002
3,2101.505/00020,CLEARING,1.4,engineers,800000,220002
4,2101.505/00030,GRUBBING,1.4,engineers,400000,220002


In [5]:
bid_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 568 entries, 0 to 567
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   item_id           568 non-null    object 
 1   long_description  568 non-null    object 
 2   quantity          568 non-null    float64
 3   bidder_id         568 non-null    object 
 4   unit_price        568 non-null    int64  
 5   contract_id       568 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 26.8+ KB


#### Bidder Transformer

In [6]:
bidder_df = transform_bidder_df(df=abstract.bidder_df)

In [7]:
bidder_df.head()

Unnamed: 0,id,name
0,198793,"Duininck, Inc."
1,210000,Knife River Corporation - North Central
2,207897,"Central Specialties, Inc."


In [8]:
bidder_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      3 non-null      object
 1   name    3 non-null      object
dtypes: object(2)
memory usage: 176.0+ bytes


#### Contract Transformer

In [9]:
contract_df = transform_contract_df(
    df=abstract.contract_df,
    winning_bidder_id=abstract.winning_bidder_id,
    spec_year=abstract.spec_year,
)


In [10]:
contract_df.head()

Unnamed: 0,letting_date,description,id,sp_number,district,county,winning_bidder_id,spec_year
0,2022-01-28,LOCATED ON T.H. 14 FROM 490' WEST OF T.H. 71 T...,220002,0803-44,Mankato,BROWN,198793,2001


In [11]:
contract_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   letting_date       1 non-null      datetime64[ns]
 1   description        1 non-null      object        
 2   id                 1 non-null      object        
 3   sp_number          1 non-null      object        
 4   district           1 non-null      object        
 5   county             1 non-null      object        
 6   winning_bidder_id  1 non-null      object        
 7   spec_year          1 non-null      object        
dtypes: datetime64[ns](1), object(7)
memory usage: 192.0+ bytes
