In [1]:
import sys
sys.path.append("..")

from pathlib import Path

from mndot_bid_etl.reader.abstract import create_abstract_data_from_csv
from mndot_bid_etl.reader.item import create_item_data_from_csv
from mndot_bid_etl.transform.functional.transform_bid import transform_bid_df
from mndot_bid_etl.transform.functional.transform_bidder import transform_bidder_df
from mndot_bid_etl.transform.functional.transform_contract import transform_contract_df

In [2]:
abstract_data = create_abstract_data_from_csv(Path("../data/csv/2022/220002.csv").resolve())

item_data_2020 = create_item_data_from_csv(Path("../data/csv/item_list_2020.csv").resolve())

# New transformation logic to match API

In [3]:
df =abstract_data.contract_df
df.head()

Unnamed: 0,Letting Date,Job Description,Contract Id,SP Number,District,County
0,01/28/2022,LOCATED ON T.H. 14 FROM 490' WEST OF T.H. 71 T...,220002,0803-44,Mankato,BROWN


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Letting Date     1 non-null      string
 1   Job Description  1 non-null      string
 2   Contract Id      1 non-null      string
 3   SP Number        1 non-null      string
 4   District         1 non-null      string
 5   County           1 non-null      string
dtypes: string(6)
memory usage: 176.0 bytes


In [5]:
import pandas as pd
from mndot_bid_etl.dtype import DType

In [6]:
out_df = pd.DataFrame()
out_df.head()

In [7]:
out_df["id"] = df["Contract Id"].astype(DType.INT64)
out_df.head()

Unnamed: 0,id
0,220002


In [8]:
out_df["letting_date"] = pd.to_datetime(df["Letting Date"])
out_df.head()

Unnamed: 0,id,letting_date
0,220002,2022-01-28


In [9]:
out_df["sp_number"] = df["SP Number"].str.strip()
out_df["district"] = df["District"].str.strip().str.title()
out_df["county"] = df["County"].str.strip().str.title()
out_df["description"] = df["Job Description"].str.strip()
out_df.head()

Unnamed: 0,id,letting_date,sp_number,district,county,description
0,220002,2022-01-28,0803-44,Mankato,Brown,LOCATED ON T.H. 14 FROM 490' WEST OF T.H. 71 T...


In [10]:
out_df["winning_bidder_id"] = 2
out_df["spec_year"] = "2020"
out_df["spec_year"] = out_df["spec_year"].astype(DType.STRING)
out_df.head()

Unnamed: 0,id,letting_date,sp_number,district,county,description,winning_bidder_id,spec_year
0,220002,2022-01-28,0803-44,Mankato,Brown,LOCATED ON T.H. 14 FROM 490' WEST OF T.H. 71 T...,2,2020


In [11]:
out_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   id                 1 non-null      int64         
 1   letting_date       1 non-null      datetime64[ns]
 2   sp_number          1 non-null      string        
 3   district           1 non-null      string        
 4   county             1 non-null      string        
 5   description        1 non-null      string        
 6   winning_bidder_id  1 non-null      int64         
 7   spec_year          1 non-null      string        
dtypes: datetime64[ns](1), int64(2), string(5)
memory usage: 192.0 bytes


In [12]:
from mndot_bid_etl.transform import contract

transformed_contract_df = contract.transform_contract(abstract_data.contract_df, 2, "2020")
transformed_contract_df.head()

Unnamed: 0,id,letting_date,sp_number,district,county,description,winning_bidder_id,spec_year
0,220002,2022-01-28,0803-44,Mankato,Brown,LOCATED ON T.H. 14 FROM 490' WEST OF T.H. 71 T...,2,2020


In [13]:
transformed_contract_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   id                 1 non-null      int64         
 1   letting_date       1 non-null      datetime64[ns]
 2   sp_number          1 non-null      string        
 3   district           1 non-null      string        
 4   county             1 non-null      string        
 5   description        1 non-null      string        
 6   winning_bidder_id  1 non-null      int64         
 7   spec_year          1 non-null      string        
dtypes: datetime64[ns](1), int64(2), string(5)
memory usage: 192.0 bytes
